diff --git "a/.triton/dump/89f8cc1079aa03024e56dc2aee42813a/triton_.llir" "b/.triton/dump/89f8cc1079aa03024e56dc2aee42813a/triton_.llir" new file mode 100644--- /dev/null +++ "b/.triton/dump/89f8cc1079aa03024e56dc2aee42813a/triton_.llir" @@ -0,0 +1,1473 @@ +; ModuleID = 'LLVMDialectModule' +source_filename = "LLVMDialectModule" + +@assertFunc_0 = internal constant [25 x i8] c"_call_with_frames_removed" +@assertFile_0 = internal constant [38 x i8] c"" +@assertMessage_0 = internal constant [38 x i8] c"index out of bounds: 0 <= tmp7 < 50257" +@global_smem = external addrspace(3) global [0 x i8] +@.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1 + +declare void @__assertfail(ptr, ptr, i32, ptr, i64) local_unnamed_addr + +define void @triton__0d1d2d3d4d5d6e7de(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, ptr addrspace(1) %4, ptr addrspace(1) %5, i64 %6, i64 %7) local_unnamed_addr !dbg !7 { + %9 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10 + %urem = and i32 %9, 255, !dbg !10 + %10 = shl nuw nsw i32 %urem, 2, !dbg !10 + %11 = or i32 %10, 1, !dbg !10 + %12 = or i32 %10, 2, !dbg !10 + %13 = or i32 %10, 3, !dbg !10 + %14 = or i32 %urem, 256, !dbg !10 + %15 = or i32 %urem, 512, !dbg !10 + %16 = or i32 %urem, 768, !dbg !10 + %17 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #5, !dbg !11 + %18 = sext i32 %17 to i64, !dbg !12 + %19 = icmp slt i32 %17, 8, !dbg !13 + %20 = mul nsw i64 %18, 7680, !dbg !14 + %21 = mul nsw i64 %18, 385973760, !dbg !15 + %22 = getelementptr i64, ptr addrspace(1) %0, i64 %20 + %23 = shl nuw nsw i32 %urem, 3 + %24 = zext nneg i32 %23 to i64 + %25 = getelementptr float, ptr addrspace(3) @global_smem, i64 %24 + %26 = shl nuw nsw i32 %11, 1 + %27 = zext nneg i32 %26 to i64 + %28 = getelementptr float, ptr addrspace(3) @global_smem, i64 %27 + %29 = shl nuw nsw i32 %12, 1 + %30 = zext nneg i32 %29 to i64 + %31 = getelementptr float, ptr addrspace(3) @global_smem, i64 %30 + %32 = shl nuw nsw i32 %13, 1 + %33 = zext nneg i32 %32 to i64 + %34 = getelementptr float, ptr addrspace(3) @global_smem, i64 %33 + %35 = shl nuw nsw i32 %urem, 1 + %36 = zext nneg i32 %35 to i64 + %37 = getelementptr float, ptr addrspace(3) @global_smem, i64 %36 + %38 = shl nuw nsw i32 %14, 1 + %39 = zext nneg i32 %38 to i64 + %40 = getelementptr float, ptr addrspace(3) @global_smem, i64 %39 + %41 = shl nuw nsw i32 %15, 1 + %42 = zext nneg i32 %41 to i64 + %43 = getelementptr float, ptr addrspace(3) @global_smem, i64 %42 + %44 = shl nuw nsw i32 %16, 1 + %45 = zext nneg i32 %44 to i64 + %46 = getelementptr float, ptr addrspace(3) @global_smem, i64 %45 + %47 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %24 + %48 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %27 + %49 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %30 + %50 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %33 + %51 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %36 + %52 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %39 + %53 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %42 + %54 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %45 + %55 = zext nneg i32 %10 to i64 + %56 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %55 + %57 = zext nneg i32 %urem to i64 + %58 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %57 + %59 = zext nneg i32 %14 to i64 + %60 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %59 + %61 = zext nneg i32 %15 to i64 + %62 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %61 + %63 = zext nneg i32 %16 to i64 + %64 = getelementptr i8, ptr addrspace(3) @global_smem, i64 %63 + %65 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %24 + %66 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %27 + %67 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %30 + %68 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %33 + %69 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %36 + %70 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %39 + %71 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %42 + %72 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %45 + %73 = insertelement <8 x i1> poison, i1 %19, i64 0, !dbg !16 + br label %74, !dbg !17 + +74: ; preds = %8, %__nv_logf.exit239 + %75 = phi i32 [ 0, %8 ], [ %774, %__nv_logf.exit239 ] + %76 = phi <8 x float> [ zeroinitializer, %8 ], [ %768, %__nv_logf.exit239 ] + %77 = phi <8 x i64> [ zeroinitializer, %8 ], [ %773, %__nv_logf.exit239 ] + %78 = or i32 %75, %10, !dbg !18 + %79 = zext nneg i32 %78 to i64, !dbg !18 + %80 = or i32 %75, %11, !dbg !18 + %81 = zext nneg i32 %80 to i64, !dbg !18 + %82 = or i32 %75, %12, !dbg !18 + %83 = zext nneg i32 %82 to i64, !dbg !18 + %84 = or i32 %75, %13, !dbg !18 + %85 = zext nneg i32 %84 to i64, !dbg !18 + %86 = or i32 %78, 1024, !dbg !18 + %87 = zext nneg i32 %86 to i64, !dbg !18 + %88 = or i32 %78, 1025, !dbg !18 + %89 = zext nneg i32 %88 to i64, !dbg !18 + %90 = or i32 %78, 1026, !dbg !18 + %91 = zext nneg i32 %90 to i64, !dbg !18 + %92 = or i32 %78, 1027, !dbg !18 + %93 = zext nneg i32 %92 to i64, !dbg !18 + %94 = or i32 %75, 1536, !dbg !18 + %95 = icmp ult i32 %86, 7680, !dbg !19 + %96 = icmp ult i32 %94, 7680, !dbg !19 + %97 = add nsw i64 %20, %79, !dbg !20 + %98 = add nsw i64 %20, %87, !dbg !20 + %99 = getelementptr i64, ptr addrspace(1) %0, i64 %97, !dbg !21 + %100 = getelementptr i64, ptr addrspace(1) %22, i64 %83, !dbg !21 + %101 = getelementptr i64, ptr addrspace(1) %0, i64 %98, !dbg !21 + %102 = getelementptr i64, ptr addrspace(1) %22, i64 %91, !dbg !21 + %103 = and i1 %19, %95, !dbg !22 + %104 = and i1 %19, %96, !dbg !22 + %105 = tail call { i64, i64 } asm sideeffect "mov.u64 $0, 0x0;\0A\09mov.u64 $1, 0x0;\0A\09@$3 ld.global.L1::evict_first.v2.b64 { $0, $1 }, [ $2 + 0 ];\0A\09@!$4 mov.u64 $0, 0x0;\0A\09@!$5 mov.u64 $1, 0x0;", "=l,=l,l,b,b,b"(ptr addrspace(1) %99, i1 %19, i1 %19, i1 %19) #5, !dbg !23 + %106 = extractvalue { i64, i64 } %105, 0, !dbg !23 + %107 = extractvalue { i64, i64 } %105, 1, !dbg !23 + %108 = tail call { i64, i64 } asm sideeffect "mov.u64 $0, 0x0;\0A\09mov.u64 $1, 0x0;\0A\09@$3 ld.global.L1::evict_first.v2.b64 { $0, $1 }, [ $2 + 0 ];\0A\09@!$4 mov.u64 $0, 0x0;\0A\09@!$5 mov.u64 $1, 0x0;", "=l,=l,l,b,b,b"(ptr addrspace(1) %100, i1 %19, i1 %19, i1 %19) #5, !dbg !23 + %109 = extractvalue { i64, i64 } %108, 0, !dbg !23 + %110 = extractvalue { i64, i64 } %108, 1, !dbg !23 + %111 = tail call { i64, i64 } asm sideeffect "mov.u64 $0, 0x0;\0A\09mov.u64 $1, 0x0;\0A\09@$3 ld.global.L1::evict_first.v2.b64 { $0, $1 }, [ $2 + 0 ];\0A\09@!$4 mov.u64 $0, 0x0;\0A\09@!$5 mov.u64 $1, 0x0;", "=l,=l,l,b,b,b"(ptr addrspace(1) %101, i1 %103, i1 %103, i1 %103) #5, !dbg !23 + %112 = extractvalue { i64, i64 } %111, 0, !dbg !23 + %113 = extractvalue { i64, i64 } %111, 1, !dbg !23 + %114 = tail call { i64, i64 } asm sideeffect "mov.u64 $0, 0x0;\0A\09mov.u64 $1, 0x0;\0A\09@$3 ld.global.L1::evict_first.v2.b64 { $0, $1 }, [ $2 + 0 ];\0A\09@!$4 mov.u64 $0, 0x0;\0A\09@!$5 mov.u64 $1, 0x0;", "=l,=l,l,b,b,b"(ptr addrspace(1) %102, i1 %103, i1 %103, i1 %103) #5, !dbg !23 + %115 = extractvalue { i64, i64 } %114, 0, !dbg !23 + %116 = extractvalue { i64, i64 } %114, 1, !dbg !23 + %117 = getelementptr float, ptr addrspace(1) %2, i64 %97, !dbg !24 + %118 = getelementptr float, ptr addrspace(1) %2, i64 %98, !dbg !24 + %119 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.L1::evict_first.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %117, i1 %19, i32 0, i1 %19, i32 0, i1 %19, i32 0, i1 %19, i32 0, i1 %19) #5, !dbg !25 + %120 = extractvalue { i32, i32, i32, i32 } %119, 0, !dbg !25 + %121 = extractvalue { i32, i32, i32, i32 } %119, 1, !dbg !25 + %122 = extractvalue { i32, i32, i32, i32 } %119, 2, !dbg !25 + %123 = extractvalue { i32, i32, i32, i32 } %119, 3, !dbg !25 + %124 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.L1::evict_first.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %118, i1 %103, i32 0, i1 %103, i32 0, i1 %103, i32 0, i1 %103, i32 0, i1 %103) #5, !dbg !25 + %125 = extractvalue { i32, i32, i32, i32 } %124, 0, !dbg !25 + %126 = extractvalue { i32, i32, i32, i32 } %124, 1, !dbg !25 + %127 = extractvalue { i32, i32, i32, i32 } %124, 2, !dbg !25 + %128 = extractvalue { i32, i32, i32, i32 } %124, 3, !dbg !25 + tail call void @llvm.nvvm.barrier0(), !dbg !25 + store i32 %120, ptr addrspace(3) %25, align 4, !dbg !25 + store i32 %121, ptr addrspace(3) %28, align 4, !dbg !25 + store i32 %122, ptr addrspace(3) %31, align 4, !dbg !25 + store i32 %123, ptr addrspace(3) %34, align 4, !dbg !25 + tail call void @llvm.nvvm.barrier0(), !dbg !25 + %129 = load float, ptr addrspace(3) %37, align 4, !dbg !25 + %130 = load float, ptr addrspace(3) %40, align 4, !dbg !25 + %131 = load float, ptr addrspace(3) %43, align 4, !dbg !25 + %132 = load float, ptr addrspace(3) %46, align 4, !dbg !25 + tail call void @llvm.nvvm.barrier0(), !dbg !25 + store i32 %125, ptr addrspace(3) %25, align 4, !dbg !25 + store i32 %126, ptr addrspace(3) %28, align 4, !dbg !25 + store i32 %127, ptr addrspace(3) %31, align 4, !dbg !25 + store i32 %128, ptr addrspace(3) %34, align 4, !dbg !25 + tail call void @llvm.nvvm.barrier0(), !dbg !25 + %133 = load float, ptr addrspace(3) %37, align 4, !dbg !25 + %134 = load float, ptr addrspace(3) %40, align 4, !dbg !25 + %135 = load float, ptr addrspace(3) %43, align 4, !dbg !25 + %136 = load float, ptr addrspace(3) %46, align 4, !dbg !25 + %137 = getelementptr float, ptr addrspace(1) %3, i64 %97, !dbg !26 + %138 = getelementptr float, ptr addrspace(1) %3, i64 %98, !dbg !26 + %139 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.L1::evict_first.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %137, i1 %19, i32 0, i1 %19, i32 0, i1 %19, i32 0, i1 %19, i32 0, i1 %19) #5, !dbg !27 + %140 = extractvalue { i32, i32, i32, i32 } %139, 0, !dbg !27 + %141 = extractvalue { i32, i32, i32, i32 } %139, 1, !dbg !27 + %142 = extractvalue { i32, i32, i32, i32 } %139, 2, !dbg !27 + %143 = extractvalue { i32, i32, i32, i32 } %139, 3, !dbg !27 + %144 = bitcast i32 %140 to float, !dbg !27 + %145 = bitcast i32 %141 to float, !dbg !27 + %146 = bitcast i32 %142 to float, !dbg !27 + %147 = bitcast i32 %143 to float, !dbg !27 + %148 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.L1::evict_first.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %138, i1 %103, i32 0, i1 %103, i32 0, i1 %103, i32 0, i1 %103, i32 0, i1 %103) #5, !dbg !27 + %149 = extractvalue { i32, i32, i32, i32 } %148, 0, !dbg !27 + %150 = extractvalue { i32, i32, i32, i32 } %148, 1, !dbg !27 + %151 = extractvalue { i32, i32, i32, i32 } %148, 2, !dbg !27 + %152 = extractvalue { i32, i32, i32, i32 } %148, 3, !dbg !27 + %153 = bitcast i32 %149 to float, !dbg !27 + %154 = bitcast i32 %150 to float, !dbg !27 + %155 = bitcast i32 %151 to float, !dbg !27 + %156 = bitcast i32 %152 to float, !dbg !27 + %157 = insertelement <8 x i64> poison, i64 %106, i64 0, !dbg !28 + %158 = insertelement <8 x i64> %157, i64 %107, i64 1, !dbg !28 + %159 = insertelement <8 x i64> %158, i64 %109, i64 2, !dbg !28 + %160 = insertelement <8 x i64> %159, i64 %110, i64 3, !dbg !28 + %161 = insertelement <8 x i64> %160, i64 %112, i64 4, !dbg !28 + %162 = insertelement <8 x i64> %161, i64 %113, i64 5, !dbg !28 + %163 = insertelement <8 x i64> %162, i64 %115, i64 6, !dbg !28 + %164 = insertelement <8 x i64> %163, i64 %116, i64 7, !dbg !28 + %165 = icmp ne <8 x i64> %164, , !dbg !28 + tail call void @llvm.nvvm.barrier0(), !dbg !28 + %166 = extractelement <8 x i1> %165, i64 0, !dbg !29 + %167 = zext i1 %166 to i8, !dbg !28 + %168 = insertelement <1 x i8> undef, i8 %167, i64 0, !dbg !28 + store <1 x i8> %168, ptr addrspace(3) %47, align 1, !dbg !28 + %169 = extractelement <8 x i1> %165, i64 1, !dbg !29 + %170 = zext i1 %169 to i8, !dbg !28 + %171 = insertelement <1 x i8> undef, i8 %170, i64 0, !dbg !28 + store <1 x i8> %171, ptr addrspace(3) %48, align 1, !dbg !28 + %172 = extractelement <8 x i1> %165, i64 2, !dbg !29 + %173 = zext i1 %172 to i8, !dbg !28 + %174 = insertelement <1 x i8> undef, i8 %173, i64 0, !dbg !28 + store <1 x i8> %174, ptr addrspace(3) %49, align 1, !dbg !28 + %175 = extractelement <8 x i1> %165, i64 3, !dbg !29 + %176 = zext i1 %175 to i8, !dbg !28 + %177 = insertelement <1 x i8> undef, i8 %176, i64 0, !dbg !28 + store <1 x i8> %177, ptr addrspace(3) %50, align 1, !dbg !28 + tail call void @llvm.nvvm.barrier0(), !dbg !28 + %178 = load i8, ptr addrspace(3) %51, align 1, !dbg !28 + %179 = load i8, ptr addrspace(3) %52, align 1, !dbg !28 + %180 = load i8, ptr addrspace(3) %53, align 1, !dbg !28 + %181 = load i8, ptr addrspace(3) %54, align 1, !dbg !28 + tail call void @llvm.nvvm.barrier0(), !dbg !28 + %182 = extractelement <8 x i1> %165, i64 4, !dbg !29 + %183 = zext i1 %182 to i8, !dbg !28 + %184 = insertelement <1 x i8> undef, i8 %183, i64 0, !dbg !28 + store <1 x i8> %184, ptr addrspace(3) %47, align 1, !dbg !28 + %185 = extractelement <8 x i1> %165, i64 5, !dbg !29 + %186 = zext i1 %185 to i8, !dbg !28 + %187 = insertelement <1 x i8> undef, i8 %186, i64 0, !dbg !28 + store <1 x i8> %187, ptr addrspace(3) %48, align 1, !dbg !28 + %188 = extractelement <8 x i1> %165, i64 6, !dbg !29 + %189 = zext i1 %188 to i8, !dbg !28 + %190 = insertelement <1 x i8> undef, i8 %189, i64 0, !dbg !28 + store <1 x i8> %190, ptr addrspace(3) %49, align 1, !dbg !28 + %191 = extractelement <8 x i1> %165, i64 7, !dbg !29 + %192 = zext i1 %191 to i8, !dbg !28 + %193 = insertelement <1 x i8> undef, i8 %192, i64 0, !dbg !28 + store <1 x i8> %193, ptr addrspace(3) %50, align 1, !dbg !28 + tail call void @llvm.nvvm.barrier0(), !dbg !28 + %194 = load i8, ptr addrspace(3) %51, align 1, !dbg !28 + %195 = load i8, ptr addrspace(3) %52, align 1, !dbg !28 + %196 = load i8, ptr addrspace(3) %53, align 1, !dbg !28 + %197 = load i8, ptr addrspace(3) %54, align 1, !dbg !28 + %198 = insertelement <8 x i8> poison, i8 %178, i64 0, !dbg !28 + %199 = insertelement <8 x i8> %198, i8 %179, i64 1, !dbg !28 + %200 = insertelement <8 x i8> %199, i8 %180, i64 2, !dbg !28 + %201 = insertelement <8 x i8> %200, i8 %181, i64 3, !dbg !28 + %202 = insertelement <8 x i8> %201, i8 %194, i64 4, !dbg !28 + %203 = insertelement <8 x i8> %202, i8 %195, i64 5, !dbg !28 + %204 = insertelement <8 x i8> %203, i8 %196, i64 6, !dbg !28 + %205 = insertelement <8 x i8> %204, i8 %197, i64 7, !dbg !28 + %206 = icmp eq <8 x i8> %205, zeroinitializer, !dbg !28 + tail call void @llvm.nvvm.barrier0(), !dbg !30 + %207 = shufflevector <8 x i1> %165, <8 x i1> poison, <4 x i32> , !dbg !29 + %208 = insertelement <4 x i64> poison, i64 %106, i64 0, !dbg !29 + %209 = insertelement <4 x i64> %208, i64 %107, i64 1, !dbg !29 + %210 = insertelement <4 x i64> %209, i64 %109, i64 2, !dbg !29 + %211 = insertelement <4 x i64> %210, i64 %110, i64 3, !dbg !29 + %212 = select <4 x i1> %207, <4 x i64> %211, <4 x i64> zeroinitializer, !dbg !29 + %213 = add <4 x i64> %212, , !dbg !31 + %214 = icmp slt <4 x i64> %212, zeroinitializer, !dbg !32 + %215 = select <4 x i1> %214, <4 x i64> %213, <4 x i64> %212, !dbg !33 + %216 = icmp ult <4 x i64> %215, , !dbg !30 + %217 = zext <4 x i1> %216 to <4 x i8>, !dbg !30 + store <4 x i8> %217, ptr addrspace(3) %56, align 4, !dbg !30 + tail call void @llvm.nvvm.barrier0(), !dbg !30 + %218 = load i8, ptr addrspace(3) %58, align 1, !dbg !30 + %219 = load i8, ptr addrspace(3) %60, align 1, !dbg !30 + %220 = load i8, ptr addrspace(3) %62, align 1, !dbg !30 + %221 = load i8, ptr addrspace(3) %64, align 1, !dbg !30 + tail call void @llvm.nvvm.barrier0(), !dbg !30 + %222 = shufflevector <8 x i1> %165, <8 x i1> poison, <4 x i32> , !dbg !29 + %223 = insertelement <4 x i64> poison, i64 %112, i64 0, !dbg !29 + %224 = insertelement <4 x i64> %223, i64 %113, i64 1, !dbg !29 + %225 = insertelement <4 x i64> %224, i64 %115, i64 2, !dbg !29 + %226 = insertelement <4 x i64> %225, i64 %116, i64 3, !dbg !29 + %227 = select <4 x i1> %222, <4 x i64> %226, <4 x i64> zeroinitializer, !dbg !29 + %228 = add <4 x i64> %227, , !dbg !31 + %229 = icmp slt <4 x i64> %227, zeroinitializer, !dbg !32 + %230 = select <4 x i1> %229, <4 x i64> %228, <4 x i64> %227, !dbg !33 + %231 = icmp ult <4 x i64> %230, , !dbg !30 + %232 = zext <4 x i1> %231 to <4 x i8>, !dbg !30 + store <4 x i8> %232, ptr addrspace(3) %56, align 4, !dbg !30 + tail call void @llvm.nvvm.barrier0(), !dbg !30 + %233 = load i8, ptr addrspace(3) %58, align 1, !dbg !30 + %234 = load i8, ptr addrspace(3) %60, align 1, !dbg !30 + %235 = load i8, ptr addrspace(3) %62, align 1, !dbg !30 + %236 = load i8, ptr addrspace(3) %64, align 1, !dbg !30 + %237 = insertelement <8 x i8> poison, i8 %219, i64 0, !dbg !30 + %238 = insertelement <8 x i8> %237, i8 %218, i64 1, !dbg !30 + %239 = insertelement <8 x i8> %238, i8 %220, i64 2, !dbg !30 + %240 = insertelement <8 x i8> %239, i8 %221, i64 3, !dbg !30 + %241 = insertelement <8 x i8> %240, i8 %233, i64 4, !dbg !30 + %242 = insertelement <8 x i8> %241, i8 %234, i64 5, !dbg !30 + %243 = insertelement <8 x i8> %242, i8 %235, i64 6, !dbg !30 + %244 = insertelement <8 x i8> %243, i8 %236, i64 7, !dbg !30 + %245 = icmp eq <8 x i8> %244, zeroinitializer, !dbg !30 + %246 = bitcast <8 x i1> %245 to i8, !dbg !34 + %.not = icmp eq i8 %246, 0, !dbg !34 + br i1 %.not, label %248, label %247, !dbg !34 + +247: ; preds = %74 + tail call void @__assertfail(ptr nonnull @assertMessage_0, ptr nonnull @assertFile_0, i32 883, ptr nonnull @assertFunc_0, i64 1), !dbg !34 + br label %248, !dbg !34 + +248: ; preds = %247, %74 + %249 = mul nuw nsw i64 %79, 50257, !dbg !35 + %250 = mul nuw nsw i64 %81, 50257, !dbg !35 + %251 = mul nuw nsw i64 %83, 50257, !dbg !35 + %252 = mul nuw nsw i64 %85, 50257, !dbg !35 + %253 = mul nuw nsw i64 %87, 50257, !dbg !35 + %254 = mul nuw nsw i64 %89, 50257, !dbg !35 + %255 = mul nuw nsw i64 %91, 50257, !dbg !35 + %256 = mul nuw nsw i64 %93, 50257, !dbg !35 + %257 = extractelement <4 x i64> %215, i64 0, !dbg !36 + %258 = getelementptr i16, ptr addrspace(1) %1, i64 %257, !dbg !36 + %259 = getelementptr i16, ptr addrspace(1) %258, i64 %249, !dbg !36 + %260 = getelementptr i16, ptr addrspace(1) %259, i64 %21, !dbg !36 + %261 = extractelement <4 x i64> %215, i64 1, !dbg !36 + %262 = getelementptr i16, ptr addrspace(1) %1, i64 %261, !dbg !36 + %263 = getelementptr i16, ptr addrspace(1) %262, i64 %250, !dbg !36 + %264 = getelementptr i16, ptr addrspace(1) %263, i64 %21, !dbg !36 + %265 = extractelement <4 x i64> %215, i64 2, !dbg !36 + %266 = getelementptr i16, ptr addrspace(1) %1, i64 %265, !dbg !36 + %267 = getelementptr i16, ptr addrspace(1) %266, i64 %251, !dbg !36 + %268 = getelementptr i16, ptr addrspace(1) %267, i64 %21, !dbg !36 + %269 = extractelement <4 x i64> %215, i64 3, !dbg !36 + %270 = getelementptr i16, ptr addrspace(1) %1, i64 %269, !dbg !36 + %271 = getelementptr i16, ptr addrspace(1) %270, i64 %252, !dbg !36 + %272 = getelementptr i16, ptr addrspace(1) %271, i64 %21, !dbg !36 + %273 = extractelement <4 x i64> %230, i64 0, !dbg !36 + %274 = getelementptr i16, ptr addrspace(1) %1, i64 %273, !dbg !36 + %275 = getelementptr i16, ptr addrspace(1) %274, i64 %253, !dbg !36 + %276 = getelementptr i16, ptr addrspace(1) %275, i64 %21, !dbg !36 + %277 = extractelement <4 x i64> %230, i64 1, !dbg !36 + %278 = getelementptr i16, ptr addrspace(1) %1, i64 %277, !dbg !36 + %279 = getelementptr i16, ptr addrspace(1) %278, i64 %254, !dbg !36 + %280 = getelementptr i16, ptr addrspace(1) %279, i64 %21, !dbg !36 + %281 = extractelement <4 x i64> %230, i64 2, !dbg !36 + %282 = getelementptr i16, ptr addrspace(1) %1, i64 %281, !dbg !36 + %283 = getelementptr i16, ptr addrspace(1) %282, i64 %255, !dbg !36 + %284 = getelementptr i16, ptr addrspace(1) %283, i64 %21, !dbg !36 + %285 = extractelement <4 x i64> %230, i64 3, !dbg !36 + %286 = getelementptr i16, ptr addrspace(1) %1, i64 %285, !dbg !36 + %287 = getelementptr i16, ptr addrspace(1) %286, i64 %256, !dbg !36 + %288 = getelementptr i16, ptr addrspace(1) %287, i64 %21, !dbg !36 + tail call void @llvm.nvvm.barrier0(), !dbg !37 + %289 = ptrtoint ptr addrspace(1) %260 to i64, !dbg !37 + %290 = insertelement <1 x i64> undef, i64 %289, i64 0, !dbg !37 + store <1 x i64> %290, ptr addrspace(3) %65, align 8, !dbg !37 + %291 = ptrtoint ptr addrspace(1) %264 to i64, !dbg !37 + %292 = insertelement <1 x i64> undef, i64 %291, i64 0, !dbg !37 + store <1 x i64> %292, ptr addrspace(3) %66, align 8, !dbg !37 + %293 = ptrtoint ptr addrspace(1) %268 to i64, !dbg !37 + %294 = insertelement <1 x i64> undef, i64 %293, i64 0, !dbg !37 + store <1 x i64> %294, ptr addrspace(3) %67, align 8, !dbg !37 + %295 = ptrtoint ptr addrspace(1) %272 to i64, !dbg !37 + %296 = insertelement <1 x i64> undef, i64 %295, i64 0, !dbg !37 + store <1 x i64> %296, ptr addrspace(3) %68, align 8, !dbg !37 + tail call void @llvm.nvvm.barrier0(), !dbg !37 + %297 = load i64, ptr addrspace(3) %69, align 8, !dbg !37 + %298 = inttoptr i64 %297 to ptr addrspace(1), !dbg !37 + %299 = load i64, ptr addrspace(3) %70, align 8, !dbg !37 + %300 = inttoptr i64 %299 to ptr addrspace(1), !dbg !37 + %301 = load i64, ptr addrspace(3) %71, align 8, !dbg !37 + %302 = inttoptr i64 %301 to ptr addrspace(1), !dbg !37 + %303 = load i64, ptr addrspace(3) %72, align 8, !dbg !37 + %304 = inttoptr i64 %303 to ptr addrspace(1), !dbg !37 + tail call void @llvm.nvvm.barrier0(), !dbg !37 + %305 = ptrtoint ptr addrspace(1) %276 to i64, !dbg !37 + %306 = insertelement <1 x i64> undef, i64 %305, i64 0, !dbg !37 + store <1 x i64> %306, ptr addrspace(3) %65, align 8, !dbg !37 + %307 = ptrtoint ptr addrspace(1) %280 to i64, !dbg !37 + %308 = insertelement <1 x i64> undef, i64 %307, i64 0, !dbg !37 + store <1 x i64> %308, ptr addrspace(3) %66, align 8, !dbg !37 + %309 = ptrtoint ptr addrspace(1) %284 to i64, !dbg !37 + %310 = insertelement <1 x i64> undef, i64 %309, i64 0, !dbg !37 + store <1 x i64> %310, ptr addrspace(3) %67, align 8, !dbg !37 + %311 = ptrtoint ptr addrspace(1) %288 to i64, !dbg !37 + %312 = insertelement <1 x i64> undef, i64 %311, i64 0, !dbg !37 + store <1 x i64> %312, ptr addrspace(3) %68, align 8, !dbg !37 + tail call void @llvm.nvvm.barrier0(), !dbg !37 + %313 = load i64, ptr addrspace(3) %69, align 8, !dbg !37 + %314 = inttoptr i64 %313 to ptr addrspace(1), !dbg !37 + %315 = load i64, ptr addrspace(3) %70, align 8, !dbg !37 + %316 = inttoptr i64 %315 to ptr addrspace(1), !dbg !37 + %317 = load i64, ptr addrspace(3) %71, align 8, !dbg !37 + %318 = inttoptr i64 %317 to ptr addrspace(1), !dbg !37 + %319 = load i64, ptr addrspace(3) %72, align 8, !dbg !37 + %320 = inttoptr i64 %319 to ptr addrspace(1), !dbg !37 + %321 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %298, i1 %19, i16 0, i1 %19) #5, !dbg !37 + %322 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %300, i1 %19, i16 0, i1 %19) #5, !dbg !37 + %323 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %302, i1 %19, i16 0, i1 %19) #5, !dbg !37 + %324 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %304, i1 %19, i16 0, i1 %19) #5, !dbg !37 + %325 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %314, i1 %19, i16 0, i1 %19) #5, !dbg !37 + %326 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %316, i1 %19, i16 0, i1 %19) #5, !dbg !37 + %327 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %318, i1 %104, i16 0, i1 %104) #5, !dbg !37 + %328 = tail call i16 asm sideeffect "mov.u16 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b16 { $0 }, [ $1 + 0 ];\0A\09@!$4 mov.u16 $0, $3;", "=c,l,b,c,b"(ptr addrspace(1) %320, i1 %104, i16 0, i1 %104) #5, !dbg !37 + %329 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %321) #5, !dbg !38 + %330 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %322) #5, !dbg !38 + %331 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %323) #5, !dbg !38 + %332 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %324) #5, !dbg !38 + %333 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %325) #5, !dbg !38 + %334 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %326) #5, !dbg !38 + %335 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %327) #5, !dbg !38 + %336 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %328) #5, !dbg !38 + %337 = insertelement <8 x float> poison, float %329, i64 0, !dbg !39 + %338 = insertelement <8 x float> %337, float %330, i64 1, !dbg !39 + %339 = insertelement <8 x float> %338, float %331, i64 2, !dbg !39 + %340 = insertelement <8 x float> %339, float %332, i64 3, !dbg !39 + %341 = insertelement <8 x float> %340, float %333, i64 4, !dbg !39 + %342 = insertelement <8 x float> %341, float %334, i64 5, !dbg !39 + %343 = insertelement <8 x float> %342, float %335, i64 6, !dbg !39 + %344 = insertelement <8 x float> %343, float %336, i64 7, !dbg !39 + %345 = insertelement <8 x float> poison, float %129, i64 0, !dbg !39 + %346 = insertelement <8 x float> %345, float %130, i64 1, !dbg !39 + %347 = insertelement <8 x float> %346, float %131, i64 2, !dbg !39 + %348 = insertelement <8 x float> %347, float %132, i64 3, !dbg !39 + %349 = insertelement <8 x float> %348, float %133, i64 4, !dbg !39 + %350 = insertelement <8 x float> %349, float %134, i64 5, !dbg !39 + %351 = insertelement <8 x float> %350, float %135, i64 6, !dbg !39 + %352 = insertelement <8 x float> %351, float %136, i64 7, !dbg !39 + %353 = fsub <8 x float> %344, %352, !dbg !39 + %354 = fcmp olt float %144, 0x3810000000000000, !dbg !40 + %355 = fmul float %144, 0x4160000000000000, !dbg !40 + %.02.i = select i1 %354, float %355, float %144, !dbg !40 + %i.i.0.i = select i1 %354, float -2.300000e+01, float 0.000000e+00, !dbg !40 + %356 = bitcast float %.02.i to i32, !dbg !40 + %357 = add i32 %356, -1059760811, !dbg !40 + %358 = and i32 %357, -8388608, !dbg !40 + %359 = sub i32 %356, %358, !dbg !40 + %360 = bitcast i32 %359 to float, !dbg !40 + %361 = sitofp i32 %358 to float, !dbg !40 + %362 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not.i = icmp eq i32 %362, 0, !dbg !40 + %363 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %361, float 0x3E80000000000000, float %i.i.0.i) #5, !dbg !40 + %364 = tail call float @llvm.nvvm.fma.rn.f(float %361, float 0x3E80000000000000, float %i.i.0.i) #5, !dbg !40 + %.08.i = select i1 %.not.i, float %364, float %363, !dbg !40 + %365 = fadd float %360, -1.000000e+00, !dbg !40 + %366 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not1.i = icmp eq i32 %366, 0, !dbg !40 + %367 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %365, float 0x3FC2073EC0000000) #5, !dbg !40 + %368 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %365, float 0x3FC2073EC0000000) #5, !dbg !40 + %.010.i = select i1 %.not1.i, float %368, float %367, !dbg !40 + %369 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not2.i = icmp eq i32 %369, 0, !dbg !40 + %370 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i, float %365, float 0xBFBF19B980000000) #5, !dbg !40 + %371 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i, float %365, float 0xBFBF19B980000000) #5, !dbg !40 + %.011.i = select i1 %.not2.i, float %371, float %370, !dbg !40 + %372 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not3.i = icmp eq i32 %372, 0, !dbg !40 + %373 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i, float %365, float 0x3FC1E52AA0000000) #5, !dbg !40 + %374 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i, float %365, float 0x3FC1E52AA0000000) #5, !dbg !40 + %.012.i = select i1 %.not3.i, float %374, float %373, !dbg !40 + %375 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not4.i = icmp eq i32 %375, 0, !dbg !40 + %376 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i, float %365, float 0xBFC55B1720000000) #5, !dbg !40 + %377 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i, float %365, float 0xBFC55B1720000000) #5, !dbg !40 + %.09.i = select i1 %.not4.i, float %377, float %376, !dbg !40 + %378 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not5.i = icmp eq i32 %378, 0, !dbg !40 + %379 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i, float %365, float 0x3FC99DA160000000) #5, !dbg !40 + %380 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i, float %365, float 0x3FC99DA160000000) #5, !dbg !40 + %.05.i = select i1 %.not5.i, float %380, float %379, !dbg !40 + %381 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not6.i = icmp eq i32 %381, 0, !dbg !40 + %382 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i, float %365, float 0xBFCFFFE440000000) #5, !dbg !40 + %383 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i, float %365, float 0xBFCFFFE440000000) #5, !dbg !40 + %.01.i = select i1 %.not6.i, float %383, float %382, !dbg !40 + %384 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not7.i = icmp eq i32 %384, 0, !dbg !40 + %385 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i, float %365, float 0x3FD5554F00000000) #5, !dbg !40 + %386 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i, float %365, float 0x3FD5554F00000000) #5, !dbg !40 + %.0.i = select i1 %.not7.i, float %386, float %385, !dbg !40 + %387 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not8.i = icmp eq i32 %387, 0, !dbg !40 + %388 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i, float %365, float -5.000000e-01) #5, !dbg !40 + %389 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i, float %365, float -5.000000e-01) #5, !dbg !40 + %.07.i = select i1 %.not8.i, float %389, float %388, !dbg !40 + %390 = fmul float %365, %.07.i, !dbg !40 + %391 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not9.i = icmp eq i32 %391, 0, !dbg !40 + %392 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %390, float %365, float %365) #5, !dbg !40 + %393 = tail call float @llvm.nvvm.fma.rn.f(float %390, float %365, float %365) #5, !dbg !40 + %.06.i = select i1 %.not9.i, float %393, float %392, !dbg !40 + %394 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not10.i = icmp eq i32 %394, 0, !dbg !40 + %395 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i, float 0x3FE62E4300000000, float %.06.i) #5, !dbg !40 + %396 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i, float 0x3FE62E4300000000, float %.06.i) #5, !dbg !40 + %.04.i = select i1 %.not10.i, float %396, float %395, !dbg !40 + %397 = icmp ugt i32 %356, 2139095039, !dbg !40 + br i1 %397, label %__nv_fmaf_rn.exit.i.i, label %__nv_logf.exit, !dbg !40 + +__nv_fmaf_rn.exit.i.i: ; preds = %248 + %398 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not11.i = icmp eq i32 %398, 0, !dbg !40 + %399 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %400 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %.03.i = select i1 %.not11.i, float %400, float %399, !dbg !40 + br label %__nv_logf.exit, !dbg !40 + +__nv_logf.exit: ; preds = %248, %__nv_fmaf_rn.exit.i.i + %r.i.0.i = phi float [ %.03.i, %__nv_fmaf_rn.exit.i.i ], [ %.04.i, %248 ], !dbg !40 + %401 = fcmp oeq float %.02.i, 0.000000e+00, !dbg !40 + %r.i.1.i = select i1 %401, float 0xFFF0000000000000, float %r.i.0.i, !dbg !40 + %402 = fcmp olt float %145, 0x3810000000000000, !dbg !40 + %403 = fmul float %145, 0x4160000000000000, !dbg !40 + %.02.i30 = select i1 %402, float %403, float %145, !dbg !40 + %i.i.0.i31 = select i1 %402, float -2.300000e+01, float 0.000000e+00, !dbg !40 + %404 = bitcast float %.02.i30 to i32, !dbg !40 + %405 = add i32 %404, -1059760811, !dbg !40 + %406 = and i32 %405, -8388608, !dbg !40 + %407 = sub i32 %404, %406, !dbg !40 + %408 = bitcast i32 %407 to float, !dbg !40 + %409 = sitofp i32 %406 to float, !dbg !40 + %410 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not.i32 = icmp eq i32 %410, 0, !dbg !40 + %411 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %409, float 0x3E80000000000000, float %i.i.0.i31) #5, !dbg !40 + %412 = tail call float @llvm.nvvm.fma.rn.f(float %409, float 0x3E80000000000000, float %i.i.0.i31) #5, !dbg !40 + %.08.i33 = select i1 %.not.i32, float %412, float %411, !dbg !40 + %413 = fadd float %408, -1.000000e+00, !dbg !40 + %414 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not1.i34 = icmp eq i32 %414, 0, !dbg !40 + %415 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %413, float 0x3FC2073EC0000000) #5, !dbg !40 + %416 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %413, float 0x3FC2073EC0000000) #5, !dbg !40 + %.010.i35 = select i1 %.not1.i34, float %416, float %415, !dbg !40 + %417 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not2.i36 = icmp eq i32 %417, 0, !dbg !40 + %418 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i35, float %413, float 0xBFBF19B980000000) #5, !dbg !40 + %419 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i35, float %413, float 0xBFBF19B980000000) #5, !dbg !40 + %.011.i37 = select i1 %.not2.i36, float %419, float %418, !dbg !40 + %420 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not3.i38 = icmp eq i32 %420, 0, !dbg !40 + %421 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i37, float %413, float 0x3FC1E52AA0000000) #5, !dbg !40 + %422 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i37, float %413, float 0x3FC1E52AA0000000) #5, !dbg !40 + %.012.i39 = select i1 %.not3.i38, float %422, float %421, !dbg !40 + %423 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not4.i40 = icmp eq i32 %423, 0, !dbg !40 + %424 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i39, float %413, float 0xBFC55B1720000000) #5, !dbg !40 + %425 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i39, float %413, float 0xBFC55B1720000000) #5, !dbg !40 + %.09.i41 = select i1 %.not4.i40, float %425, float %424, !dbg !40 + %426 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not5.i42 = icmp eq i32 %426, 0, !dbg !40 + %427 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i41, float %413, float 0x3FC99DA160000000) #5, !dbg !40 + %428 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i41, float %413, float 0x3FC99DA160000000) #5, !dbg !40 + %.05.i43 = select i1 %.not5.i42, float %428, float %427, !dbg !40 + %429 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not6.i44 = icmp eq i32 %429, 0, !dbg !40 + %430 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i43, float %413, float 0xBFCFFFE440000000) #5, !dbg !40 + %431 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i43, float %413, float 0xBFCFFFE440000000) #5, !dbg !40 + %.01.i45 = select i1 %.not6.i44, float %431, float %430, !dbg !40 + %432 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not7.i46 = icmp eq i32 %432, 0, !dbg !40 + %433 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i45, float %413, float 0x3FD5554F00000000) #5, !dbg !40 + %434 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i45, float %413, float 0x3FD5554F00000000) #5, !dbg !40 + %.0.i47 = select i1 %.not7.i46, float %434, float %433, !dbg !40 + %435 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not8.i48 = icmp eq i32 %435, 0, !dbg !40 + %436 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i47, float %413, float -5.000000e-01) #5, !dbg !40 + %437 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i47, float %413, float -5.000000e-01) #5, !dbg !40 + %.07.i49 = select i1 %.not8.i48, float %437, float %436, !dbg !40 + %438 = fmul float %413, %.07.i49, !dbg !40 + %439 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not9.i50 = icmp eq i32 %439, 0, !dbg !40 + %440 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %438, float %413, float %413) #5, !dbg !40 + %441 = tail call float @llvm.nvvm.fma.rn.f(float %438, float %413, float %413) #5, !dbg !40 + %.06.i51 = select i1 %.not9.i50, float %441, float %440, !dbg !40 + %442 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not10.i52 = icmp eq i32 %442, 0, !dbg !40 + %443 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i33, float 0x3FE62E4300000000, float %.06.i51) #5, !dbg !40 + %444 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i33, float 0x3FE62E4300000000, float %.06.i51) #5, !dbg !40 + %.04.i53 = select i1 %.not10.i52, float %444, float %443, !dbg !40 + %445 = icmp ugt i32 %404, 2139095039, !dbg !40 + br i1 %445, label %__nv_fmaf_rn.exit.i.i56, label %__nv_logf.exit59, !dbg !40 + +__nv_fmaf_rn.exit.i.i56: ; preds = %__nv_logf.exit + %446 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not11.i57 = icmp eq i32 %446, 0, !dbg !40 + %447 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i30, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %448 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i30, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %.03.i58 = select i1 %.not11.i57, float %448, float %447, !dbg !40 + br label %__nv_logf.exit59, !dbg !40 + +__nv_logf.exit59: ; preds = %__nv_logf.exit, %__nv_fmaf_rn.exit.i.i56 + %r.i.0.i54 = phi float [ %.03.i58, %__nv_fmaf_rn.exit.i.i56 ], [ %.04.i53, %__nv_logf.exit ], !dbg !40 + %449 = fcmp oeq float %.02.i30, 0.000000e+00, !dbg !40 + %r.i.1.i55 = select i1 %449, float 0xFFF0000000000000, float %r.i.0.i54, !dbg !40 + %450 = fcmp olt float %146, 0x3810000000000000, !dbg !40 + %451 = fmul float %146, 0x4160000000000000, !dbg !40 + %.02.i60 = select i1 %450, float %451, float %146, !dbg !40 + %i.i.0.i61 = select i1 %450, float -2.300000e+01, float 0.000000e+00, !dbg !40 + %452 = bitcast float %.02.i60 to i32, !dbg !40 + %453 = add i32 %452, -1059760811, !dbg !40 + %454 = and i32 %453, -8388608, !dbg !40 + %455 = sub i32 %452, %454, !dbg !40 + %456 = bitcast i32 %455 to float, !dbg !40 + %457 = sitofp i32 %454 to float, !dbg !40 + %458 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not.i62 = icmp eq i32 %458, 0, !dbg !40 + %459 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %457, float 0x3E80000000000000, float %i.i.0.i61) #5, !dbg !40 + %460 = tail call float @llvm.nvvm.fma.rn.f(float %457, float 0x3E80000000000000, float %i.i.0.i61) #5, !dbg !40 + %.08.i63 = select i1 %.not.i62, float %460, float %459, !dbg !40 + %461 = fadd float %456, -1.000000e+00, !dbg !40 + %462 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not1.i64 = icmp eq i32 %462, 0, !dbg !40 + %463 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %461, float 0x3FC2073EC0000000) #5, !dbg !40 + %464 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %461, float 0x3FC2073EC0000000) #5, !dbg !40 + %.010.i65 = select i1 %.not1.i64, float %464, float %463, !dbg !40 + %465 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not2.i66 = icmp eq i32 %465, 0, !dbg !40 + %466 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i65, float %461, float 0xBFBF19B980000000) #5, !dbg !40 + %467 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i65, float %461, float 0xBFBF19B980000000) #5, !dbg !40 + %.011.i67 = select i1 %.not2.i66, float %467, float %466, !dbg !40 + %468 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not3.i68 = icmp eq i32 %468, 0, !dbg !40 + %469 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i67, float %461, float 0x3FC1E52AA0000000) #5, !dbg !40 + %470 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i67, float %461, float 0x3FC1E52AA0000000) #5, !dbg !40 + %.012.i69 = select i1 %.not3.i68, float %470, float %469, !dbg !40 + %471 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not4.i70 = icmp eq i32 %471, 0, !dbg !40 + %472 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i69, float %461, float 0xBFC55B1720000000) #5, !dbg !40 + %473 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i69, float %461, float 0xBFC55B1720000000) #5, !dbg !40 + %.09.i71 = select i1 %.not4.i70, float %473, float %472, !dbg !40 + %474 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not5.i72 = icmp eq i32 %474, 0, !dbg !40 + %475 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i71, float %461, float 0x3FC99DA160000000) #5, !dbg !40 + %476 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i71, float %461, float 0x3FC99DA160000000) #5, !dbg !40 + %.05.i73 = select i1 %.not5.i72, float %476, float %475, !dbg !40 + %477 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not6.i74 = icmp eq i32 %477, 0, !dbg !40 + %478 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i73, float %461, float 0xBFCFFFE440000000) #5, !dbg !40 + %479 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i73, float %461, float 0xBFCFFFE440000000) #5, !dbg !40 + %.01.i75 = select i1 %.not6.i74, float %479, float %478, !dbg !40 + %480 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not7.i76 = icmp eq i32 %480, 0, !dbg !40 + %481 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i75, float %461, float 0x3FD5554F00000000) #5, !dbg !40 + %482 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i75, float %461, float 0x3FD5554F00000000) #5, !dbg !40 + %.0.i77 = select i1 %.not7.i76, float %482, float %481, !dbg !40 + %483 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not8.i78 = icmp eq i32 %483, 0, !dbg !40 + %484 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i77, float %461, float -5.000000e-01) #5, !dbg !40 + %485 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i77, float %461, float -5.000000e-01) #5, !dbg !40 + %.07.i79 = select i1 %.not8.i78, float %485, float %484, !dbg !40 + %486 = fmul float %461, %.07.i79, !dbg !40 + %487 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not9.i80 = icmp eq i32 %487, 0, !dbg !40 + %488 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %486, float %461, float %461) #5, !dbg !40 + %489 = tail call float @llvm.nvvm.fma.rn.f(float %486, float %461, float %461) #5, !dbg !40 + %.06.i81 = select i1 %.not9.i80, float %489, float %488, !dbg !40 + %490 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not10.i82 = icmp eq i32 %490, 0, !dbg !40 + %491 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i63, float 0x3FE62E4300000000, float %.06.i81) #5, !dbg !40 + %492 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i63, float 0x3FE62E4300000000, float %.06.i81) #5, !dbg !40 + %.04.i83 = select i1 %.not10.i82, float %492, float %491, !dbg !40 + %493 = icmp ugt i32 %452, 2139095039, !dbg !40 + br i1 %493, label %__nv_fmaf_rn.exit.i.i86, label %__nv_logf.exit89, !dbg !40 + +__nv_fmaf_rn.exit.i.i86: ; preds = %__nv_logf.exit59 + %494 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not11.i87 = icmp eq i32 %494, 0, !dbg !40 + %495 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i60, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %496 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i60, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %.03.i88 = select i1 %.not11.i87, float %496, float %495, !dbg !40 + br label %__nv_logf.exit89, !dbg !40 + +__nv_logf.exit89: ; preds = %__nv_logf.exit59, %__nv_fmaf_rn.exit.i.i86 + %r.i.0.i84 = phi float [ %.03.i88, %__nv_fmaf_rn.exit.i.i86 ], [ %.04.i83, %__nv_logf.exit59 ], !dbg !40 + %497 = fcmp oeq float %.02.i60, 0.000000e+00, !dbg !40 + %r.i.1.i85 = select i1 %497, float 0xFFF0000000000000, float %r.i.0.i84, !dbg !40 + %498 = fcmp olt float %147, 0x3810000000000000, !dbg !40 + %499 = fmul float %147, 0x4160000000000000, !dbg !40 + %.02.i90 = select i1 %498, float %499, float %147, !dbg !40 + %i.i.0.i91 = select i1 %498, float -2.300000e+01, float 0.000000e+00, !dbg !40 + %500 = bitcast float %.02.i90 to i32, !dbg !40 + %501 = add i32 %500, -1059760811, !dbg !40 + %502 = and i32 %501, -8388608, !dbg !40 + %503 = sub i32 %500, %502, !dbg !40 + %504 = bitcast i32 %503 to float, !dbg !40 + %505 = sitofp i32 %502 to float, !dbg !40 + %506 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not.i92 = icmp eq i32 %506, 0, !dbg !40 + %507 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %505, float 0x3E80000000000000, float %i.i.0.i91) #5, !dbg !40 + %508 = tail call float @llvm.nvvm.fma.rn.f(float %505, float 0x3E80000000000000, float %i.i.0.i91) #5, !dbg !40 + %.08.i93 = select i1 %.not.i92, float %508, float %507, !dbg !40 + %509 = fadd float %504, -1.000000e+00, !dbg !40 + %510 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not1.i94 = icmp eq i32 %510, 0, !dbg !40 + %511 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %509, float 0x3FC2073EC0000000) #5, !dbg !40 + %512 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %509, float 0x3FC2073EC0000000) #5, !dbg !40 + %.010.i95 = select i1 %.not1.i94, float %512, float %511, !dbg !40 + %513 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not2.i96 = icmp eq i32 %513, 0, !dbg !40 + %514 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i95, float %509, float 0xBFBF19B980000000) #5, !dbg !40 + %515 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i95, float %509, float 0xBFBF19B980000000) #5, !dbg !40 + %.011.i97 = select i1 %.not2.i96, float %515, float %514, !dbg !40 + %516 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not3.i98 = icmp eq i32 %516, 0, !dbg !40 + %517 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i97, float %509, float 0x3FC1E52AA0000000) #5, !dbg !40 + %518 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i97, float %509, float 0x3FC1E52AA0000000) #5, !dbg !40 + %.012.i99 = select i1 %.not3.i98, float %518, float %517, !dbg !40 + %519 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not4.i100 = icmp eq i32 %519, 0, !dbg !40 + %520 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i99, float %509, float 0xBFC55B1720000000) #5, !dbg !40 + %521 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i99, float %509, float 0xBFC55B1720000000) #5, !dbg !40 + %.09.i101 = select i1 %.not4.i100, float %521, float %520, !dbg !40 + %522 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not5.i102 = icmp eq i32 %522, 0, !dbg !40 + %523 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i101, float %509, float 0x3FC99DA160000000) #5, !dbg !40 + %524 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i101, float %509, float 0x3FC99DA160000000) #5, !dbg !40 + %.05.i103 = select i1 %.not5.i102, float %524, float %523, !dbg !40 + %525 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not6.i104 = icmp eq i32 %525, 0, !dbg !40 + %526 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i103, float %509, float 0xBFCFFFE440000000) #5, !dbg !40 + %527 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i103, float %509, float 0xBFCFFFE440000000) #5, !dbg !40 + %.01.i105 = select i1 %.not6.i104, float %527, float %526, !dbg !40 + %528 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not7.i106 = icmp eq i32 %528, 0, !dbg !40 + %529 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i105, float %509, float 0x3FD5554F00000000) #5, !dbg !40 + %530 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i105, float %509, float 0x3FD5554F00000000) #5, !dbg !40 + %.0.i107 = select i1 %.not7.i106, float %530, float %529, !dbg !40 + %531 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not8.i108 = icmp eq i32 %531, 0, !dbg !40 + %532 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i107, float %509, float -5.000000e-01) #5, !dbg !40 + %533 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i107, float %509, float -5.000000e-01) #5, !dbg !40 + %.07.i109 = select i1 %.not8.i108, float %533, float %532, !dbg !40 + %534 = fmul float %509, %.07.i109, !dbg !40 + %535 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not9.i110 = icmp eq i32 %535, 0, !dbg !40 + %536 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %534, float %509, float %509) #5, !dbg !40 + %537 = tail call float @llvm.nvvm.fma.rn.f(float %534, float %509, float %509) #5, !dbg !40 + %.06.i111 = select i1 %.not9.i110, float %537, float %536, !dbg !40 + %538 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not10.i112 = icmp eq i32 %538, 0, !dbg !40 + %539 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i93, float 0x3FE62E4300000000, float %.06.i111) #5, !dbg !40 + %540 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i93, float 0x3FE62E4300000000, float %.06.i111) #5, !dbg !40 + %.04.i113 = select i1 %.not10.i112, float %540, float %539, !dbg !40 + %541 = icmp ugt i32 %500, 2139095039, !dbg !40 + br i1 %541, label %__nv_fmaf_rn.exit.i.i116, label %__nv_logf.exit119, !dbg !40 + +__nv_fmaf_rn.exit.i.i116: ; preds = %__nv_logf.exit89 + %542 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not11.i117 = icmp eq i32 %542, 0, !dbg !40 + %543 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i90, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %544 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i90, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %.03.i118 = select i1 %.not11.i117, float %544, float %543, !dbg !40 + br label %__nv_logf.exit119, !dbg !40 + +__nv_logf.exit119: ; preds = %__nv_logf.exit89, %__nv_fmaf_rn.exit.i.i116 + %r.i.0.i114 = phi float [ %.03.i118, %__nv_fmaf_rn.exit.i.i116 ], [ %.04.i113, %__nv_logf.exit89 ], !dbg !40 + %545 = fcmp oeq float %.02.i90, 0.000000e+00, !dbg !40 + %r.i.1.i115 = select i1 %545, float 0xFFF0000000000000, float %r.i.0.i114, !dbg !40 + %546 = fcmp olt float %153, 0x3810000000000000, !dbg !40 + %547 = fmul float %153, 0x4160000000000000, !dbg !40 + %.02.i120 = select i1 %546, float %547, float %153, !dbg !40 + %i.i.0.i121 = select i1 %546, float -2.300000e+01, float 0.000000e+00, !dbg !40 + %548 = bitcast float %.02.i120 to i32, !dbg !40 + %549 = add i32 %548, -1059760811, !dbg !40 + %550 = and i32 %549, -8388608, !dbg !40 + %551 = sub i32 %548, %550, !dbg !40 + %552 = bitcast i32 %551 to float, !dbg !40 + %553 = sitofp i32 %550 to float, !dbg !40 + %554 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not.i122 = icmp eq i32 %554, 0, !dbg !40 + %555 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %553, float 0x3E80000000000000, float %i.i.0.i121) #5, !dbg !40 + %556 = tail call float @llvm.nvvm.fma.rn.f(float %553, float 0x3E80000000000000, float %i.i.0.i121) #5, !dbg !40 + %.08.i123 = select i1 %.not.i122, float %556, float %555, !dbg !40 + %557 = fadd float %552, -1.000000e+00, !dbg !40 + %558 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not1.i124 = icmp eq i32 %558, 0, !dbg !40 + %559 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %557, float 0x3FC2073EC0000000) #5, !dbg !40 + %560 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %557, float 0x3FC2073EC0000000) #5, !dbg !40 + %.010.i125 = select i1 %.not1.i124, float %560, float %559, !dbg !40 + %561 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not2.i126 = icmp eq i32 %561, 0, !dbg !40 + %562 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i125, float %557, float 0xBFBF19B980000000) #5, !dbg !40 + %563 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i125, float %557, float 0xBFBF19B980000000) #5, !dbg !40 + %.011.i127 = select i1 %.not2.i126, float %563, float %562, !dbg !40 + %564 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not3.i128 = icmp eq i32 %564, 0, !dbg !40 + %565 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i127, float %557, float 0x3FC1E52AA0000000) #5, !dbg !40 + %566 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i127, float %557, float 0x3FC1E52AA0000000) #5, !dbg !40 + %.012.i129 = select i1 %.not3.i128, float %566, float %565, !dbg !40 + %567 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not4.i130 = icmp eq i32 %567, 0, !dbg !40 + %568 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i129, float %557, float 0xBFC55B1720000000) #5, !dbg !40 + %569 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i129, float %557, float 0xBFC55B1720000000) #5, !dbg !40 + %.09.i131 = select i1 %.not4.i130, float %569, float %568, !dbg !40 + %570 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not5.i132 = icmp eq i32 %570, 0, !dbg !40 + %571 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i131, float %557, float 0x3FC99DA160000000) #5, !dbg !40 + %572 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i131, float %557, float 0x3FC99DA160000000) #5, !dbg !40 + %.05.i133 = select i1 %.not5.i132, float %572, float %571, !dbg !40 + %573 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not6.i134 = icmp eq i32 %573, 0, !dbg !40 + %574 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i133, float %557, float 0xBFCFFFE440000000) #5, !dbg !40 + %575 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i133, float %557, float 0xBFCFFFE440000000) #5, !dbg !40 + %.01.i135 = select i1 %.not6.i134, float %575, float %574, !dbg !40 + %576 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not7.i136 = icmp eq i32 %576, 0, !dbg !40 + %577 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i135, float %557, float 0x3FD5554F00000000) #5, !dbg !40 + %578 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i135, float %557, float 0x3FD5554F00000000) #5, !dbg !40 + %.0.i137 = select i1 %.not7.i136, float %578, float %577, !dbg !40 + %579 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not8.i138 = icmp eq i32 %579, 0, !dbg !40 + %580 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i137, float %557, float -5.000000e-01) #5, !dbg !40 + %581 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i137, float %557, float -5.000000e-01) #5, !dbg !40 + %.07.i139 = select i1 %.not8.i138, float %581, float %580, !dbg !40 + %582 = fmul float %557, %.07.i139, !dbg !40 + %583 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not9.i140 = icmp eq i32 %583, 0, !dbg !40 + %584 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %582, float %557, float %557) #5, !dbg !40 + %585 = tail call float @llvm.nvvm.fma.rn.f(float %582, float %557, float %557) #5, !dbg !40 + %.06.i141 = select i1 %.not9.i140, float %585, float %584, !dbg !40 + %586 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not10.i142 = icmp eq i32 %586, 0, !dbg !40 + %587 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i123, float 0x3FE62E4300000000, float %.06.i141) #5, !dbg !40 + %588 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i123, float 0x3FE62E4300000000, float %.06.i141) #5, !dbg !40 + %.04.i143 = select i1 %.not10.i142, float %588, float %587, !dbg !40 + %589 = icmp ugt i32 %548, 2139095039, !dbg !40 + br i1 %589, label %__nv_fmaf_rn.exit.i.i146, label %__nv_logf.exit149, !dbg !40 + +__nv_fmaf_rn.exit.i.i146: ; preds = %__nv_logf.exit119 + %590 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not11.i147 = icmp eq i32 %590, 0, !dbg !40 + %591 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i120, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %592 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i120, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %.03.i148 = select i1 %.not11.i147, float %592, float %591, !dbg !40 + br label %__nv_logf.exit149, !dbg !40 + +__nv_logf.exit149: ; preds = %__nv_logf.exit119, %__nv_fmaf_rn.exit.i.i146 + %r.i.0.i144 = phi float [ %.03.i148, %__nv_fmaf_rn.exit.i.i146 ], [ %.04.i143, %__nv_logf.exit119 ], !dbg !40 + %593 = fcmp oeq float %.02.i120, 0.000000e+00, !dbg !40 + %r.i.1.i145 = select i1 %593, float 0xFFF0000000000000, float %r.i.0.i144, !dbg !40 + %594 = fcmp olt float %154, 0x3810000000000000, !dbg !40 + %595 = fmul float %154, 0x4160000000000000, !dbg !40 + %.02.i150 = select i1 %594, float %595, float %154, !dbg !40 + %i.i.0.i151 = select i1 %594, float -2.300000e+01, float 0.000000e+00, !dbg !40 + %596 = bitcast float %.02.i150 to i32, !dbg !40 + %597 = add i32 %596, -1059760811, !dbg !40 + %598 = and i32 %597, -8388608, !dbg !40 + %599 = sub i32 %596, %598, !dbg !40 + %600 = bitcast i32 %599 to float, !dbg !40 + %601 = sitofp i32 %598 to float, !dbg !40 + %602 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not.i152 = icmp eq i32 %602, 0, !dbg !40 + %603 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %601, float 0x3E80000000000000, float %i.i.0.i151) #5, !dbg !40 + %604 = tail call float @llvm.nvvm.fma.rn.f(float %601, float 0x3E80000000000000, float %i.i.0.i151) #5, !dbg !40 + %.08.i153 = select i1 %.not.i152, float %604, float %603, !dbg !40 + %605 = fadd float %600, -1.000000e+00, !dbg !40 + %606 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not1.i154 = icmp eq i32 %606, 0, !dbg !40 + %607 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %605, float 0x3FC2073EC0000000) #5, !dbg !40 + %608 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %605, float 0x3FC2073EC0000000) #5, !dbg !40 + %.010.i155 = select i1 %.not1.i154, float %608, float %607, !dbg !40 + %609 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not2.i156 = icmp eq i32 %609, 0, !dbg !40 + %610 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i155, float %605, float 0xBFBF19B980000000) #5, !dbg !40 + %611 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i155, float %605, float 0xBFBF19B980000000) #5, !dbg !40 + %.011.i157 = select i1 %.not2.i156, float %611, float %610, !dbg !40 + %612 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not3.i158 = icmp eq i32 %612, 0, !dbg !40 + %613 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i157, float %605, float 0x3FC1E52AA0000000) #5, !dbg !40 + %614 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i157, float %605, float 0x3FC1E52AA0000000) #5, !dbg !40 + %.012.i159 = select i1 %.not3.i158, float %614, float %613, !dbg !40 + %615 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not4.i160 = icmp eq i32 %615, 0, !dbg !40 + %616 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i159, float %605, float 0xBFC55B1720000000) #5, !dbg !40 + %617 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i159, float %605, float 0xBFC55B1720000000) #5, !dbg !40 + %.09.i161 = select i1 %.not4.i160, float %617, float %616, !dbg !40 + %618 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not5.i162 = icmp eq i32 %618, 0, !dbg !40 + %619 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i161, float %605, float 0x3FC99DA160000000) #5, !dbg !40 + %620 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i161, float %605, float 0x3FC99DA160000000) #5, !dbg !40 + %.05.i163 = select i1 %.not5.i162, float %620, float %619, !dbg !40 + %621 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not6.i164 = icmp eq i32 %621, 0, !dbg !40 + %622 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i163, float %605, float 0xBFCFFFE440000000) #5, !dbg !40 + %623 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i163, float %605, float 0xBFCFFFE440000000) #5, !dbg !40 + %.01.i165 = select i1 %.not6.i164, float %623, float %622, !dbg !40 + %624 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not7.i166 = icmp eq i32 %624, 0, !dbg !40 + %625 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i165, float %605, float 0x3FD5554F00000000) #5, !dbg !40 + %626 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i165, float %605, float 0x3FD5554F00000000) #5, !dbg !40 + %.0.i167 = select i1 %.not7.i166, float %626, float %625, !dbg !40 + %627 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not8.i168 = icmp eq i32 %627, 0, !dbg !40 + %628 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i167, float %605, float -5.000000e-01) #5, !dbg !40 + %629 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i167, float %605, float -5.000000e-01) #5, !dbg !40 + %.07.i169 = select i1 %.not8.i168, float %629, float %628, !dbg !40 + %630 = fmul float %605, %.07.i169, !dbg !40 + %631 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not9.i170 = icmp eq i32 %631, 0, !dbg !40 + %632 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %630, float %605, float %605) #5, !dbg !40 + %633 = tail call float @llvm.nvvm.fma.rn.f(float %630, float %605, float %605) #5, !dbg !40 + %.06.i171 = select i1 %.not9.i170, float %633, float %632, !dbg !40 + %634 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not10.i172 = icmp eq i32 %634, 0, !dbg !40 + %635 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i153, float 0x3FE62E4300000000, float %.06.i171) #5, !dbg !40 + %636 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i153, float 0x3FE62E4300000000, float %.06.i171) #5, !dbg !40 + %.04.i173 = select i1 %.not10.i172, float %636, float %635, !dbg !40 + %637 = icmp ugt i32 %596, 2139095039, !dbg !40 + br i1 %637, label %__nv_fmaf_rn.exit.i.i176, label %__nv_logf.exit179, !dbg !40 + +__nv_fmaf_rn.exit.i.i176: ; preds = %__nv_logf.exit149 + %638 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not11.i177 = icmp eq i32 %638, 0, !dbg !40 + %639 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i150, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %640 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i150, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %.03.i178 = select i1 %.not11.i177, float %640, float %639, !dbg !40 + br label %__nv_logf.exit179, !dbg !40 + +__nv_logf.exit179: ; preds = %__nv_logf.exit149, %__nv_fmaf_rn.exit.i.i176 + %r.i.0.i174 = phi float [ %.03.i178, %__nv_fmaf_rn.exit.i.i176 ], [ %.04.i173, %__nv_logf.exit149 ], !dbg !40 + %641 = fcmp oeq float %.02.i150, 0.000000e+00, !dbg !40 + %r.i.1.i175 = select i1 %641, float 0xFFF0000000000000, float %r.i.0.i174, !dbg !40 + %642 = fcmp olt float %155, 0x3810000000000000, !dbg !40 + %643 = fmul float %155, 0x4160000000000000, !dbg !40 + %.02.i180 = select i1 %642, float %643, float %155, !dbg !40 + %i.i.0.i181 = select i1 %642, float -2.300000e+01, float 0.000000e+00, !dbg !40 + %644 = bitcast float %.02.i180 to i32, !dbg !40 + %645 = add i32 %644, -1059760811, !dbg !40 + %646 = and i32 %645, -8388608, !dbg !40 + %647 = sub i32 %644, %646, !dbg !40 + %648 = bitcast i32 %647 to float, !dbg !40 + %649 = sitofp i32 %646 to float, !dbg !40 + %650 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not.i182 = icmp eq i32 %650, 0, !dbg !40 + %651 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %649, float 0x3E80000000000000, float %i.i.0.i181) #5, !dbg !40 + %652 = tail call float @llvm.nvvm.fma.rn.f(float %649, float 0x3E80000000000000, float %i.i.0.i181) #5, !dbg !40 + %.08.i183 = select i1 %.not.i182, float %652, float %651, !dbg !40 + %653 = fadd float %648, -1.000000e+00, !dbg !40 + %654 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not1.i184 = icmp eq i32 %654, 0, !dbg !40 + %655 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %653, float 0x3FC2073EC0000000) #5, !dbg !40 + %656 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %653, float 0x3FC2073EC0000000) #5, !dbg !40 + %.010.i185 = select i1 %.not1.i184, float %656, float %655, !dbg !40 + %657 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not2.i186 = icmp eq i32 %657, 0, !dbg !40 + %658 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i185, float %653, float 0xBFBF19B980000000) #5, !dbg !40 + %659 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i185, float %653, float 0xBFBF19B980000000) #5, !dbg !40 + %.011.i187 = select i1 %.not2.i186, float %659, float %658, !dbg !40 + %660 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not3.i188 = icmp eq i32 %660, 0, !dbg !40 + %661 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i187, float %653, float 0x3FC1E52AA0000000) #5, !dbg !40 + %662 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i187, float %653, float 0x3FC1E52AA0000000) #5, !dbg !40 + %.012.i189 = select i1 %.not3.i188, float %662, float %661, !dbg !40 + %663 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not4.i190 = icmp eq i32 %663, 0, !dbg !40 + %664 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i189, float %653, float 0xBFC55B1720000000) #5, !dbg !40 + %665 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i189, float %653, float 0xBFC55B1720000000) #5, !dbg !40 + %.09.i191 = select i1 %.not4.i190, float %665, float %664, !dbg !40 + %666 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not5.i192 = icmp eq i32 %666, 0, !dbg !40 + %667 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i191, float %653, float 0x3FC99DA160000000) #5, !dbg !40 + %668 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i191, float %653, float 0x3FC99DA160000000) #5, !dbg !40 + %.05.i193 = select i1 %.not5.i192, float %668, float %667, !dbg !40 + %669 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not6.i194 = icmp eq i32 %669, 0, !dbg !40 + %670 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i193, float %653, float 0xBFCFFFE440000000) #5, !dbg !40 + %671 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i193, float %653, float 0xBFCFFFE440000000) #5, !dbg !40 + %.01.i195 = select i1 %.not6.i194, float %671, float %670, !dbg !40 + %672 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not7.i196 = icmp eq i32 %672, 0, !dbg !40 + %673 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i195, float %653, float 0x3FD5554F00000000) #5, !dbg !40 + %674 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i195, float %653, float 0x3FD5554F00000000) #5, !dbg !40 + %.0.i197 = select i1 %.not7.i196, float %674, float %673, !dbg !40 + %675 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not8.i198 = icmp eq i32 %675, 0, !dbg !40 + %676 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i197, float %653, float -5.000000e-01) #5, !dbg !40 + %677 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i197, float %653, float -5.000000e-01) #5, !dbg !40 + %.07.i199 = select i1 %.not8.i198, float %677, float %676, !dbg !40 + %678 = fmul float %653, %.07.i199, !dbg !40 + %679 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not9.i200 = icmp eq i32 %679, 0, !dbg !40 + %680 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %678, float %653, float %653) #5, !dbg !40 + %681 = tail call float @llvm.nvvm.fma.rn.f(float %678, float %653, float %653) #5, !dbg !40 + %.06.i201 = select i1 %.not9.i200, float %681, float %680, !dbg !40 + %682 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not10.i202 = icmp eq i32 %682, 0, !dbg !40 + %683 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i183, float 0x3FE62E4300000000, float %.06.i201) #5, !dbg !40 + %684 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i183, float 0x3FE62E4300000000, float %.06.i201) #5, !dbg !40 + %.04.i203 = select i1 %.not10.i202, float %684, float %683, !dbg !40 + %685 = icmp ugt i32 %644, 2139095039, !dbg !40 + br i1 %685, label %__nv_fmaf_rn.exit.i.i206, label %__nv_logf.exit209, !dbg !40 + +__nv_fmaf_rn.exit.i.i206: ; preds = %__nv_logf.exit179 + %686 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not11.i207 = icmp eq i32 %686, 0, !dbg !40 + %687 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i180, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %688 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i180, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %.03.i208 = select i1 %.not11.i207, float %688, float %687, !dbg !40 + br label %__nv_logf.exit209, !dbg !40 + +__nv_logf.exit209: ; preds = %__nv_logf.exit179, %__nv_fmaf_rn.exit.i.i206 + %r.i.0.i204 = phi float [ %.03.i208, %__nv_fmaf_rn.exit.i.i206 ], [ %.04.i203, %__nv_logf.exit179 ], !dbg !40 + %689 = fcmp oeq float %.02.i180, 0.000000e+00, !dbg !40 + %r.i.1.i205 = select i1 %689, float 0xFFF0000000000000, float %r.i.0.i204, !dbg !40 + %690 = fcmp olt float %156, 0x3810000000000000, !dbg !40 + %691 = fmul float %156, 0x4160000000000000, !dbg !40 + %.02.i210 = select i1 %690, float %691, float %156, !dbg !40 + %i.i.0.i211 = select i1 %690, float -2.300000e+01, float 0.000000e+00, !dbg !40 + %692 = bitcast float %.02.i210 to i32, !dbg !40 + %693 = add i32 %692, -1059760811, !dbg !40 + %694 = and i32 %693, -8388608, !dbg !40 + %695 = sub i32 %692, %694, !dbg !40 + %696 = bitcast i32 %695 to float, !dbg !40 + %697 = sitofp i32 %694 to float, !dbg !40 + %698 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not.i212 = icmp eq i32 %698, 0, !dbg !40 + %699 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %697, float 0x3E80000000000000, float %i.i.0.i211) #5, !dbg !40 + %700 = tail call float @llvm.nvvm.fma.rn.f(float %697, float 0x3E80000000000000, float %i.i.0.i211) #5, !dbg !40 + %.08.i213 = select i1 %.not.i212, float %700, float %699, !dbg !40 + %701 = fadd float %696, -1.000000e+00, !dbg !40 + %702 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not1.i214 = icmp eq i32 %702, 0, !dbg !40 + %703 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %701, float 0x3FC2073EC0000000) #5, !dbg !40 + %704 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %701, float 0x3FC2073EC0000000) #5, !dbg !40 + %.010.i215 = select i1 %.not1.i214, float %704, float %703, !dbg !40 + %705 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not2.i216 = icmp eq i32 %705, 0, !dbg !40 + %706 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010.i215, float %701, float 0xBFBF19B980000000) #5, !dbg !40 + %707 = tail call float @llvm.nvvm.fma.rn.f(float %.010.i215, float %701, float 0xBFBF19B980000000) #5, !dbg !40 + %.011.i217 = select i1 %.not2.i216, float %707, float %706, !dbg !40 + %708 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not3.i218 = icmp eq i32 %708, 0, !dbg !40 + %709 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011.i217, float %701, float 0x3FC1E52AA0000000) #5, !dbg !40 + %710 = tail call float @llvm.nvvm.fma.rn.f(float %.011.i217, float %701, float 0x3FC1E52AA0000000) #5, !dbg !40 + %.012.i219 = select i1 %.not3.i218, float %710, float %709, !dbg !40 + %711 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not4.i220 = icmp eq i32 %711, 0, !dbg !40 + %712 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012.i219, float %701, float 0xBFC55B1720000000) #5, !dbg !40 + %713 = tail call float @llvm.nvvm.fma.rn.f(float %.012.i219, float %701, float 0xBFC55B1720000000) #5, !dbg !40 + %.09.i221 = select i1 %.not4.i220, float %713, float %712, !dbg !40 + %714 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not5.i222 = icmp eq i32 %714, 0, !dbg !40 + %715 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09.i221, float %701, float 0x3FC99DA160000000) #5, !dbg !40 + %716 = tail call float @llvm.nvvm.fma.rn.f(float %.09.i221, float %701, float 0x3FC99DA160000000) #5, !dbg !40 + %.05.i223 = select i1 %.not5.i222, float %716, float %715, !dbg !40 + %717 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not6.i224 = icmp eq i32 %717, 0, !dbg !40 + %718 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i223, float %701, float 0xBFCFFFE440000000) #5, !dbg !40 + %719 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i223, float %701, float 0xBFCFFFE440000000) #5, !dbg !40 + %.01.i225 = select i1 %.not6.i224, float %719, float %718, !dbg !40 + %720 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not7.i226 = icmp eq i32 %720, 0, !dbg !40 + %721 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01.i225, float %701, float 0x3FD5554F00000000) #5, !dbg !40 + %722 = tail call float @llvm.nvvm.fma.rn.f(float %.01.i225, float %701, float 0x3FD5554F00000000) #5, !dbg !40 + %.0.i227 = select i1 %.not7.i226, float %722, float %721, !dbg !40 + %723 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not8.i228 = icmp eq i32 %723, 0, !dbg !40 + %724 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0.i227, float %701, float -5.000000e-01) #5, !dbg !40 + %725 = tail call float @llvm.nvvm.fma.rn.f(float %.0.i227, float %701, float -5.000000e-01) #5, !dbg !40 + %.07.i229 = select i1 %.not8.i228, float %725, float %724, !dbg !40 + %726 = fmul float %701, %.07.i229, !dbg !40 + %727 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not9.i230 = icmp eq i32 %727, 0, !dbg !40 + %728 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %726, float %701, float %701) #5, !dbg !40 + %729 = tail call float @llvm.nvvm.fma.rn.f(float %726, float %701, float %701) #5, !dbg !40 + %.06.i231 = select i1 %.not9.i230, float %729, float %728, !dbg !40 + %730 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not10.i232 = icmp eq i32 %730, 0, !dbg !40 + %731 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08.i213, float 0x3FE62E4300000000, float %.06.i231) #5, !dbg !40 + %732 = tail call float @llvm.nvvm.fma.rn.f(float %.08.i213, float 0x3FE62E4300000000, float %.06.i231) #5, !dbg !40 + %.04.i233 = select i1 %.not10.i232, float %732, float %731, !dbg !40 + %733 = icmp ugt i32 %692, 2139095039, !dbg !40 + br i1 %733, label %__nv_fmaf_rn.exit.i.i236, label %__nv_logf.exit239, !dbg !40 + +__nv_fmaf_rn.exit.i.i236: ; preds = %__nv_logf.exit209 + %734 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5, !dbg !40 + %.not11.i237 = icmp eq i32 %734, 0, !dbg !40 + %735 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i210, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %736 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i210, float 0x7FF0000000000000, float 0x7FF0000000000000) #5, !dbg !40 + %.03.i238 = select i1 %.not11.i237, float %736, float %735, !dbg !40 + br label %__nv_logf.exit239, !dbg !40 + +__nv_logf.exit239: ; preds = %__nv_logf.exit209, %__nv_fmaf_rn.exit.i.i236 + %r.i.0.i234 = phi float [ %.03.i238, %__nv_fmaf_rn.exit.i.i236 ], [ %.04.i233, %__nv_logf.exit209 ], !dbg !40 + %737 = fcmp oeq float %.02.i210, 0.000000e+00, !dbg !40 + %r.i.1.i235 = select i1 %737, float 0xFFF0000000000000, float %r.i.0.i234, !dbg !40 + tail call void @llvm.nvvm.barrier0(), !dbg !40 + %738 = insertelement <1 x float> undef, float %r.i.1.i, i64 0, !dbg !40 + store <1 x float> %738, ptr addrspace(3) %25, align 4, !dbg !40 + %739 = insertelement <1 x float> undef, float %r.i.1.i55, i64 0, !dbg !40 + store <1 x float> %739, ptr addrspace(3) %28, align 4, !dbg !40 + %740 = insertelement <1 x float> undef, float %r.i.1.i85, i64 0, !dbg !40 + store <1 x float> %740, ptr addrspace(3) %31, align 4, !dbg !40 + %741 = insertelement <1 x float> undef, float %r.i.1.i115, i64 0, !dbg !40 + store <1 x float> %741, ptr addrspace(3) %34, align 4, !dbg !40 + tail call void @llvm.nvvm.barrier0(), !dbg !40 + %742 = load float, ptr addrspace(3) %37, align 4, !dbg !40 + %743 = load float, ptr addrspace(3) %40, align 4, !dbg !40 + %744 = load float, ptr addrspace(3) %43, align 4, !dbg !40 + %745 = load float, ptr addrspace(3) %46, align 4, !dbg !40 + tail call void @llvm.nvvm.barrier0(), !dbg !40 + %746 = insertelement <1 x float> undef, float %r.i.1.i145, i64 0, !dbg !40 + store <1 x float> %746, ptr addrspace(3) %25, align 4, !dbg !40 + %747 = insertelement <1 x float> undef, float %r.i.1.i175, i64 0, !dbg !40 + store <1 x float> %747, ptr addrspace(3) %28, align 4, !dbg !40 + %748 = insertelement <1 x float> undef, float %r.i.1.i205, i64 0, !dbg !40 + store <1 x float> %748, ptr addrspace(3) %31, align 4, !dbg !40 + %749 = insertelement <1 x float> undef, float %r.i.1.i235, i64 0, !dbg !40 + store <1 x float> %749, ptr addrspace(3) %34, align 4, !dbg !40 + tail call void @llvm.nvvm.barrier0(), !dbg !40 + %750 = load float, ptr addrspace(3) %37, align 4, !dbg !40 + %751 = load float, ptr addrspace(3) %40, align 4, !dbg !40 + %752 = load float, ptr addrspace(3) %43, align 4, !dbg !40 + %753 = load float, ptr addrspace(3) %46, align 4, !dbg !40 + %754 = insertelement <8 x float> poison, float %742, i64 0, !dbg !41 + %755 = insertelement <8 x float> %754, float %743, i64 1, !dbg !41 + %756 = insertelement <8 x float> %755, float %744, i64 2, !dbg !41 + %757 = insertelement <8 x float> %756, float %745, i64 3, !dbg !41 + %758 = insertelement <8 x float> %757, float %750, i64 4, !dbg !41 + %759 = insertelement <8 x float> %758, float %751, i64 5, !dbg !41 + %760 = insertelement <8 x float> %759, float %752, i64 6, !dbg !41 + %761 = insertelement <8 x float> %760, float %753, i64 7, !dbg !41 + %762 = fsub <8 x float> %761, %353, !dbg !41 + %763 = fadd <8 x float> %762, zeroinitializer, !dbg !41 + %764 = select <8 x i1> %206, <8 x float> zeroinitializer, <8 x float> %763, !dbg !42 + %765 = insertelement <8 x i1> %73, i1 %104, i64 1, !dbg !16 + %766 = shufflevector <8 x i1> %765, <8 x i1> poison, <8 x i32> , !dbg !16 + %767 = select <8 x i1> %766, <8 x float> %764, <8 x float> , !dbg !16 + %768 = fadd <8 x float> %76, %767, !dbg !16 + %769 = insertelement <8 x i1> %73, i1 %103, i64 1, !dbg !43 + %770 = shufflevector <8 x i1> %769, <8 x i1> poison, <8 x i32> , !dbg !43 + %771 = select <8 x i1> %770, <8 x i1> %165, <8 x i1> zeroinitializer, !dbg !43 + %772 = zext <8 x i1> %771 to <8 x i64>, !dbg !43 + %773 = add <8 x i64> %77, %772, !dbg !43 + %774 = add nuw nsw i32 %75, 2048, !dbg !17 + %775 = icmp ult i32 %75, 5632, !dbg !17 + br i1 %775, label %74, label %776, !dbg !17 + +776: ; preds = %__nv_logf.exit239 + %777 = lshr i32 %9, 5, !dbg !10 + %778 = and i32 %777, 7, !dbg !10 + %779 = and i32 %9, 31, !dbg !10 + tail call void @llvm.nvvm.barrier0(), !dbg !44 + %shift = shufflevector <8 x float> %768, <8 x float> poison, <8 x i32> , !dbg !48 + %780 = fadd <8 x float> %768, %shift, !dbg !48 + %shift286 = shufflevector <8 x float> %768, <8 x float> poison, <8 x i32> , !dbg !48 + %781 = fadd <8 x float> %shift286, %780, !dbg !48 + %shift287 = shufflevector <8 x float> %768, <8 x float> poison, <8 x i32> , !dbg !48 + %782 = fadd <8 x float> %shift287, %781, !dbg !48 + %shift288 = shufflevector <8 x float> %768, <8 x float> poison, <8 x i32> , !dbg !48 + %783 = fadd <8 x float> %shift288, %782, !dbg !48 + %shift289 = shufflevector <8 x float> %768, <8 x float> poison, <8 x i32> , !dbg !48 + %784 = fadd <8 x float> %shift289, %783, !dbg !48 + %shift290 = shufflevector <8 x float> %768, <8 x float> poison, <8 x i32> , !dbg !48 + %785 = fadd <8 x float> %shift290, %784, !dbg !48 + %shift291 = shufflevector <8 x float> %768, <8 x float> poison, <8 x i32> , !dbg !48 + %786 = fadd <8 x float> %shift291, %785, !dbg !48 + %787 = extractelement <8 x float> %786, i64 0, !dbg !48 + %788 = bitcast float %787 to i32, !dbg !44 + %789 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %788, i32 16, i32 31), !dbg !44 + %790 = bitcast i32 %789 to float, !dbg !44 + %791 = fadd float %787, %790, !dbg !48 + %792 = bitcast float %791 to i32, !dbg !44 + %793 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %792, i32 8, i32 31), !dbg !44 + %794 = bitcast i32 %793 to float, !dbg !44 + %795 = fadd float %791, %794, !dbg !48 + %796 = bitcast float %795 to i32, !dbg !44 + %797 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %796, i32 4, i32 31), !dbg !44 + %798 = bitcast i32 %797 to float, !dbg !44 + %799 = fadd float %795, %798, !dbg !48 + %800 = bitcast float %799 to i32, !dbg !44 + %801 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %800, i32 2, i32 31), !dbg !44 + %802 = bitcast i32 %801 to float, !dbg !44 + %803 = fadd float %799, %802, !dbg !48 + %804 = bitcast float %803 to i32, !dbg !44 + %805 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %804, i32 1, i32 31), !dbg !44 + %806 = bitcast i32 %805 to float, !dbg !44 + %807 = fadd float %803, %806, !dbg !48 + %808 = icmp eq i32 %779, 0, !dbg !44 + %809 = zext nneg i32 %778 to i64, !dbg !44 + %810 = getelementptr float, ptr addrspace(3) @global_smem, i64 %809, !dbg !44 + tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %810, float %807, i1 %808) #5, !dbg !44 + tail call void @llvm.nvvm.barrier0(), !dbg !44 + %811 = icmp slt i32 %9, 8, !dbg !44 + %812 = sext i32 %9 to i64, !dbg !44 + %813 = getelementptr float, ptr addrspace(3) @global_smem, i64 %812, !dbg !44 + %814 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %813, i1 %811) #5, !dbg !44 + %815 = bitcast float %814 to i32, !dbg !44 + %816 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %815, i32 4, i32 31), !dbg !44 + %817 = bitcast i32 %816 to float, !dbg !44 + %818 = fadd float %814, %817, !dbg !48 + %819 = bitcast float %818 to i32, !dbg !44 + %820 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %819, i32 2, i32 31), !dbg !44 + %821 = bitcast i32 %820 to float, !dbg !44 + %822 = fadd float %818, %821, !dbg !48 + %823 = bitcast float %822 to i32, !dbg !44 + %824 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %823, i32 1, i32 31), !dbg !44 + %825 = bitcast i32 %824 to float, !dbg !44 + %826 = fadd float %822, %825, !dbg !48 + %827 = and i32 %9, 7, !dbg !44 + %828 = icmp eq i32 %827, 0, !dbg !44 + %829 = and i1 %811, %828, !dbg !44 + tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %813, float %826, i1 %829) #5, !dbg !44 + tail call void @llvm.nvvm.barrier0(), !dbg !44 + %830 = load i32, ptr addrspace(3) @global_smem, align 4, !dbg !44 + %831 = getelementptr float, ptr addrspace(1) %4, i64 %18, !dbg !52 + %832 = icmp eq i32 %urem, 0, !dbg !53 + %833 = and i1 %832, %19, !dbg !53 + tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %830, ptr addrspace(1) %831, i1 %833) #5, !dbg !53 + tail call void @llvm.nvvm.barrier0(), !dbg !54 + %shift292 = shufflevector <8 x i64> %773, <8 x i64> poison, <8 x i32> , !dbg !56 + %834 = add <8 x i64> %773, %shift292, !dbg !56 + %shift293 = shufflevector <8 x i64> %773, <8 x i64> poison, <8 x i32> , !dbg !56 + %835 = add <8 x i64> %834, %shift293, !dbg !56 + %shift294 = shufflevector <8 x i64> %773, <8 x i64> poison, <8 x i32> , !dbg !56 + %836 = add <8 x i64> %835, %shift294, !dbg !56 + %shift295 = shufflevector <8 x i64> %773, <8 x i64> poison, <8 x i32> , !dbg !56 + %837 = add <8 x i64> %836, %shift295, !dbg !56 + %shift296 = shufflevector <8 x i64> %773, <8 x i64> poison, <8 x i32> , !dbg !56 + %838 = add <8 x i64> %837, %shift296, !dbg !56 + %shift297 = shufflevector <8 x i64> %773, <8 x i64> poison, <8 x i32> , !dbg !56 + %839 = add <8 x i64> %838, %shift297, !dbg !56 + %shift298 = shufflevector <8 x i64> %773, <8 x i64> poison, <8 x i32> , !dbg !56 + %840 = add <8 x i64> %839, %shift298, !dbg !56 + %841 = extractelement <8 x i64> %840, i64 0, !dbg !56 + %842 = trunc i64 %841 to i32, !dbg !54 + %843 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %842, i32 16, i32 31), !dbg !54 + %bc = bitcast i64 %841 to <2 x i32>, !dbg !54 + %844 = extractelement <2 x i32> %bc, i64 1, !dbg !54 + %845 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %844, i32 16, i32 31), !dbg !54 + %846 = insertelement <2 x i32> undef, i32 %843, i64 0, !dbg !54 + %847 = insertelement <2 x i32> %846, i32 %845, i64 1, !dbg !54 + %848 = bitcast <2 x i32> %847 to i64, !dbg !54 + %849 = add i64 %841, %848, !dbg !56 + %850 = trunc i64 %849 to i32, !dbg !54 + %851 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %850, i32 8, i32 31), !dbg !54 + %bc1 = bitcast i64 %849 to <2 x i32>, !dbg !54 + %852 = extractelement <2 x i32> %bc1, i64 1, !dbg !54 + %853 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %852, i32 8, i32 31), !dbg !54 + %854 = insertelement <2 x i32> undef, i32 %851, i64 0, !dbg !54 + %855 = insertelement <2 x i32> %854, i32 %853, i64 1, !dbg !54 + %856 = bitcast <2 x i32> %855 to i64, !dbg !54 + %857 = add i64 %849, %856, !dbg !56 + %858 = trunc i64 %857 to i32, !dbg !54 + %859 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %858, i32 4, i32 31), !dbg !54 + %bc2 = bitcast i64 %857 to <2 x i32>, !dbg !54 + %860 = extractelement <2 x i32> %bc2, i64 1, !dbg !54 + %861 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %860, i32 4, i32 31), !dbg !54 + %862 = insertelement <2 x i32> undef, i32 %859, i64 0, !dbg !54 + %863 = insertelement <2 x i32> %862, i32 %861, i64 1, !dbg !54 + %864 = bitcast <2 x i32> %863 to i64, !dbg !54 + %865 = add i64 %857, %864, !dbg !56 + %866 = trunc i64 %865 to i32, !dbg !54 + %867 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %866, i32 2, i32 31), !dbg !54 + %bc3 = bitcast i64 %865 to <2 x i32>, !dbg !54 + %868 = extractelement <2 x i32> %bc3, i64 1, !dbg !54 + %869 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %868, i32 2, i32 31), !dbg !54 + %870 = insertelement <2 x i32> undef, i32 %867, i64 0, !dbg !54 + %871 = insertelement <2 x i32> %870, i32 %869, i64 1, !dbg !54 + %872 = bitcast <2 x i32> %871 to i64, !dbg !54 + %873 = add i64 %865, %872, !dbg !56 + %874 = trunc i64 %873 to i32, !dbg !54 + %875 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %874, i32 1, i32 31), !dbg !54 + %bc4 = bitcast i64 %873 to <2 x i32>, !dbg !54 + %876 = extractelement <2 x i32> %bc4, i64 1, !dbg !54 + %877 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %876, i32 1, i32 31), !dbg !54 + %878 = insertelement <2 x i32> undef, i32 %875, i64 0, !dbg !54 + %879 = insertelement <2 x i32> %878, i32 %877, i64 1, !dbg !54 + %880 = bitcast <2 x i32> %879 to i64, !dbg !54 + %881 = add i64 %873, %880, !dbg !56 + %882 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %809, !dbg !54 + tail call void asm sideeffect "@$2 st.shared.b64 [ $0 + 0 ], $1;", "r,l,b"(ptr addrspace(3) %882, i64 %881, i1 %808) #5, !dbg !54 + tail call void @llvm.nvvm.barrier0(), !dbg !54 + %883 = getelementptr i64, ptr addrspace(3) @global_smem, i64 %812, !dbg !54 + %884 = tail call i64 asm sideeffect "@$2 ld.shared.b64 $0, [ $1 + 0 ];", "=l,r,b"(ptr addrspace(3) %883, i1 %811) #5, !dbg !54 + %885 = trunc i64 %884 to i32, !dbg !54 + %886 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %885, i32 4, i32 31), !dbg !54 + %bc5 = bitcast i64 %884 to <2 x i32>, !dbg !54 + %887 = extractelement <2 x i32> %bc5, i64 1, !dbg !54 + %888 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %887, i32 4, i32 31), !dbg !54 + %889 = insertelement <2 x i32> undef, i32 %886, i64 0, !dbg !54 + %890 = insertelement <2 x i32> %889, i32 %888, i64 1, !dbg !54 + %891 = bitcast <2 x i32> %890 to i64, !dbg !54 + %892 = add i64 %884, %891, !dbg !56 + %893 = trunc i64 %892 to i32, !dbg !54 + %894 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %893, i32 2, i32 31), !dbg !54 + %bc6 = bitcast i64 %892 to <2 x i32>, !dbg !54 + %895 = extractelement <2 x i32> %bc6, i64 1, !dbg !54 + %896 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %895, i32 2, i32 31), !dbg !54 + %897 = insertelement <2 x i32> undef, i32 %894, i64 0, !dbg !54 + %898 = insertelement <2 x i32> %897, i32 %896, i64 1, !dbg !54 + %899 = bitcast <2 x i32> %898 to i64, !dbg !54 + %900 = add i64 %892, %899, !dbg !56 + %901 = trunc i64 %900 to i32, !dbg !54 + %902 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %901, i32 1, i32 31), !dbg !54 + %bc7 = bitcast i64 %900 to <2 x i32>, !dbg !54 + %903 = extractelement <2 x i32> %bc7, i64 1, !dbg !54 + %904 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %903, i32 1, i32 31), !dbg !54 + %905 = insertelement <2 x i32> undef, i32 %902, i64 0, !dbg !54 + %906 = insertelement <2 x i32> %905, i32 %904, i64 1, !dbg !54 + %907 = bitcast <2 x i32> %906 to i64, !dbg !54 + %908 = add i64 %900, %907, !dbg !56 + tail call void asm sideeffect "@$2 st.shared.b64 [ $0 + 0 ], $1;", "r,l,b"(ptr addrspace(3) %883, i64 %908, i1 %829) #5, !dbg !54 + tail call void @llvm.nvvm.barrier0(), !dbg !54 + %909 = load i64, ptr addrspace(3) @global_smem, align 4, !dbg !54 + tail call void @llvm.nvvm.barrier0(), !dbg !59 + %910 = insertelement <1 x i64> undef, i64 %909, i64 0, !dbg !59 + store <1 x i64> %910, ptr addrspace(3) @global_smem, align 8, !dbg !59 + tail call void @llvm.nvvm.barrier0(), !dbg !59 + %911 = load i64, ptr addrspace(3) @global_smem, align 8, !dbg !59 + %912 = getelementptr i64, ptr addrspace(1) %5, i64 %18, !dbg !60 + tail call void asm sideeffect "@$2 st.global.b64 [ $1 + 0 ], { $0 };", "l,l,b"(i64 %911, ptr addrspace(1) %912, i1 %833) #5, !dbg !61 + ret void, !dbg !62 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0 + +; Function Attrs: convergent nocallback nounwind +declare void @llvm.nvvm.barrier0() #1 + +; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite) +declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #2 + +; Function Attrs: alwaysinline nounwind +define float @__nv_logf(float %a) local_unnamed_addr #3 { +__nv_fmaf_rn.exit10.i: + %0 = fcmp olt float %a, 0x3810000000000000 + %1 = fmul float %a, 0x4160000000000000 + %.02 = select i1 %0, float %1, float %a + %i.i.0 = select i1 %0, float -2.300000e+01, float 0.000000e+00 + %2 = bitcast float %.02 to i32 + %3 = add i32 %2, -1059760811 + %4 = and i32 %3, -8388608 + %5 = sub i32 %2, %4 + %6 = bitcast i32 %5 to float + %7 = sitofp i32 %4 to float + %8 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not = icmp eq i32 %8, 0 + %9 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %7, float 0x3E80000000000000, float %i.i.0) #5 + %10 = tail call float @llvm.nvvm.fma.rn.f(float %7, float 0x3E80000000000000, float %i.i.0) #5 + %.08 = select i1 %.not, float %10, float %9 + %11 = fadd float %6, -1.000000e+00 + %12 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not1 = icmp eq i32 %12, 0 + %13 = tail call float @llvm.nvvm.fma.rn.ftz.f(float 0xBFC0AA04E0000000, float %11, float 0x3FC2073EC0000000) #5 + %14 = tail call float @llvm.nvvm.fma.rn.f(float 0xBFC0AA04E0000000, float %11, float 0x3FC2073EC0000000) #5 + %.010 = select i1 %.not1, float %14, float %13 + %15 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not2 = icmp eq i32 %15, 0 + %16 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.010, float %11, float 0xBFBF19B980000000) #5 + %17 = tail call float @llvm.nvvm.fma.rn.f(float %.010, float %11, float 0xBFBF19B980000000) #5 + %.011 = select i1 %.not2, float %17, float %16 + %18 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not3 = icmp eq i32 %18, 0 + %19 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.011, float %11, float 0x3FC1E52AA0000000) #5 + %20 = tail call float @llvm.nvvm.fma.rn.f(float %.011, float %11, float 0x3FC1E52AA0000000) #5 + %.012 = select i1 %.not3, float %20, float %19 + %21 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not4 = icmp eq i32 %21, 0 + %22 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.012, float %11, float 0xBFC55B1720000000) #5 + %23 = tail call float @llvm.nvvm.fma.rn.f(float %.012, float %11, float 0xBFC55B1720000000) #5 + %.09 = select i1 %.not4, float %23, float %22 + %24 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not5 = icmp eq i32 %24, 0 + %25 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.09, float %11, float 0x3FC99DA160000000) #5 + %26 = tail call float @llvm.nvvm.fma.rn.f(float %.09, float %11, float 0x3FC99DA160000000) #5 + %.05 = select i1 %.not5, float %26, float %25 + %27 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not6 = icmp eq i32 %27, 0 + %28 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05, float %11, float 0xBFCFFFE440000000) #5 + %29 = tail call float @llvm.nvvm.fma.rn.f(float %.05, float %11, float 0xBFCFFFE440000000) #5 + %.01 = select i1 %.not6, float %29, float %28 + %30 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not7 = icmp eq i32 %30, 0 + %31 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.01, float %11, float 0x3FD5554F00000000) #5 + %32 = tail call float @llvm.nvvm.fma.rn.f(float %.01, float %11, float 0x3FD5554F00000000) #5 + %.0 = select i1 %.not7, float %32, float %31 + %33 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not8 = icmp eq i32 %33, 0 + %34 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.0, float %11, float -5.000000e-01) #5 + %35 = tail call float @llvm.nvvm.fma.rn.f(float %.0, float %11, float -5.000000e-01) #5 + %.07 = select i1 %.not8, float %35, float %34 + %36 = fmul float %11, %.07 + %37 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not9 = icmp eq i32 %37, 0 + %38 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %36, float %11, float %11) #5 + %39 = tail call float @llvm.nvvm.fma.rn.f(float %36, float %11, float %11) #5 + %.06 = select i1 %.not9, float %39, float %38 + %40 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not10 = icmp eq i32 %40, 0 + %41 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.08, float 0x3FE62E4300000000, float %.06) #5 + %42 = tail call float @llvm.nvvm.fma.rn.f(float %.08, float 0x3FE62E4300000000, float %.06) #5 + %.04 = select i1 %.not10, float %42, float %41 + %43 = icmp ugt i32 %2, 2139095039 + br i1 %43, label %__nv_fmaf_rn.exit.i, label %__internal_accurate_logf.exit + +__nv_fmaf_rn.exit.i: ; preds = %__nv_fmaf_rn.exit10.i + %44 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #5 + %.not11 = icmp eq i32 %44, 0 + %45 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02, float 0x7FF0000000000000, float 0x7FF0000000000000) #5 + %46 = tail call float @llvm.nvvm.fma.rn.f(float %.02, float 0x7FF0000000000000, float 0x7FF0000000000000) #5 + %.03 = select i1 %.not11, float %46, float %45 + br label %__internal_accurate_logf.exit + +__internal_accurate_logf.exit: ; preds = %__nv_fmaf_rn.exit.i, %__nv_fmaf_rn.exit10.i + %r.i.0 = phi float [ %.03, %__nv_fmaf_rn.exit.i ], [ %.04, %__nv_fmaf_rn.exit10.i ] + %47 = fcmp oeq float %.02, 0.000000e+00 + %r.i.1 = select i1 %47, float 0xFFF0000000000000, float %r.i.0 + ret float %r.i.1 +} + +declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float) #0 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare float @llvm.nvvm.fma.rn.f(float, float, float) #0 + +attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #1 = { convergent nocallback nounwind } +attributes #2 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) } +attributes #3 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} +!nvvm.annotations = !{!4, !5, !5, !4} +!llvm.ident = !{!6} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1} +!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!3 = !DIFile(filename: "cnshxlw3p7kytog7ihat33cfh5n4z4tq3l77zyi5jxajo5uonq7m.py", directory: "/tmp/torchinductor_root/ns") +!4 = !{ptr @triton__0d1d2d3d4d5d6e7de, !"kernel", i32 1} +!5 = !{ptr @triton__0d1d2d3d4d5d6e7de, !"maxntidx", i32 256} +!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} +!7 = distinct !DISubprogram(name: "triton__0d1d2d3d4d5d6e7de", linkageName: "triton__0d1d2d3d4d5d6e7de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!8 = !DISubroutineType(cc: DW_CC_normal, types: !9) +!9 = !{} +!10 = !DILocation(line: 24, column: 33, scope: !7) +!11 = !DILocation(line: 21, column: 28, scope: !7) +!12 = !DILocation(line: 21, column: 34, scope: !7) +!13 = !DILocation(line: 23, column: 21, scope: !7) +!14 = !DILocation(line: 32, column: 45, scope: !7) +!15 = !DILocation(line: 43, column: 65, scope: !7) +!16 = !DILocation(line: 53, column: 48, scope: !7) +!17 = !DILocation(line: 28, column: 36, scope: !7) +!18 = !DILocation(line: 29, column: 27, scope: !7) +!19 = !DILocation(line: 30, column: 25, scope: !7) +!20 = !DILocation(line: 32, column: 40, scope: !7) +!21 = !DILocation(line: 32, column: 34, scope: !7) +!22 = !DILocation(line: 32, column: 59, scope: !7) +!23 = !DILocation(line: 32, column: 51, scope: !7) +!24 = !DILocation(line: 33, column: 35, scope: !7) +!25 = !DILocation(line: 33, column: 52, scope: !7) +!26 = !DILocation(line: 34, column: 35, scope: !7) +!27 = !DILocation(line: 34, column: 52, scope: !7) +!28 = !DILocation(line: 36, column: 23, scope: !7) +!29 = !DILocation(line: 38, column: 36, scope: !7) +!30 = !DILocation(line: 42, column: 40, scope: !7) +!31 = !DILocation(line: 39, column: 22, scope: !7) +!32 = !DILocation(line: 40, column: 22, scope: !7) +!33 = !DILocation(line: 41, column: 36, scope: !7) +!34 = !DILocation(line: 42, column: 55, scope: !7) +!35 = !DILocation(line: 43, column: 48, scope: !7) +!36 = !DILocation(line: 43, column: 34, scope: !7) +!37 = !DILocation(line: 43, column: 71, scope: !7) +!38 = !DILocation(line: 43, column: 130, scope: !7) +!39 = !DILocation(line: 45, column: 23, scope: !7) +!40 = !DILocation(line: 46, column: 23, scope: !7) +!41 = !DILocation(line: 48, column: 17, scope: !7) +!42 = !DILocation(line: 50, column: 38, scope: !7) +!43 = !DILocation(line: 57, column: 48, scope: !7) +!44 = !DILocation(line: 243, column: 36, scope: !45, inlinedAt: !47) +!45 = distinct !DILexicalBlockFile(scope: !7, file: !46, discriminator: 0) +!46 = !DIFile(filename: "standard.py", directory: "/usr/local/lib/python3.10/dist-packages/triton/language") +!47 = !DILocation(line: 58, column: 27, scope: !45) +!48 = !DILocation(line: 233, column: 15, scope: !49, inlinedAt: !50) +!49 = distinct !DILexicalBlockFile(scope: !45, file: !46, discriminator: 0) +!50 = !DILocation(line: 243, column: 36, scope: !49, inlinedAt: !51) +!51 = !DILocation(line: 58, column: 27, scope: !49) +!52 = !DILocation(line: 59, column: 25, scope: !7) +!53 = !DILocation(line: 59, column: 37, scope: !7) +!54 = !DILocation(line: 243, column: 36, scope: !45, inlinedAt: !55) +!55 = !DILocation(line: 60, column: 27, scope: !45) +!56 = !DILocation(line: 233, column: 15, scope: !49, inlinedAt: !57) +!57 = !DILocation(line: 243, column: 36, scope: !49, inlinedAt: !58) +!58 = !DILocation(line: 60, column: 27, scope: !49) +!59 = !DILocation(line: 60, column: 30, scope: !7) +!60 = !DILocation(line: 61, column: 25, scope: !7) +!61 = !DILocation(line: 61, column: 37, scope: !7) +!62 = !DILocation(line: 61, column: 4, scope: !7)