|
|
|
source_filename = "LLVMDialectModule" |
|
|
|
@.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1 |
|
|
|
define void @triton__0d1d2de(ptr addrspace(1) %0, ptr addrspace(1) %1, i32 %2) local_unnamed_addr !dbg !7 { |
|
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10 |
|
%5 = shl i32 %4, 1, !dbg !10 |
|
%6 = and i32 %5, 510, !dbg !10 |
|
%7 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #4, !dbg !11 |
|
%8 = shl i32 %7, 9, !dbg !12 |
|
%9 = or i32 %8, %6, !dbg !13 |
|
%10 = sext i32 %9 to i64, !dbg !14 |
|
%11 = getelementptr i16, ptr addrspace(1) %0, i64 %10, !dbg !14 |
|
%12 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %11, i1 true) #4, !dbg !15 |
|
%13 = trunc i32 %12 to i16, !dbg !15 |
|
%extelt.offset = lshr i32 %12, 16, !dbg !15 |
|
%14 = trunc i32 %extelt.offset to i16, !dbg !15 |
|
%15 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %13) #4, !dbg !16 |
|
%16 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %14) #4, !dbg !16 |
|
%17 = getelementptr i16, ptr addrspace(1) %1, i64 %10, !dbg !17 |
|
%18 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %17, i1 true) #4, !dbg !18 |
|
%19 = trunc i32 %18 to i16, !dbg !18 |
|
%extelt.offset1 = lshr i32 %18, 16, !dbg !18 |
|
%20 = trunc i32 %extelt.offset1 to i16, !dbg !18 |
|
%21 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %19) #4, !dbg !19 |
|
%22 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %20) #4, !dbg !19 |
|
%23 = fmul float %21, 0x3FE6A09E60000000, !dbg !20 |
|
%24 = fmul float %22, 0x3FE6A09E60000000, !dbg !20 |
|
%25 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not.i = icmp eq i32 %25, 0, !dbg !21 |
|
%26 = tail call float @llvm.nvvm.fabs.ftz.f(float %23) #4, !dbg !21 |
|
%27 = tail call float @llvm.nvvm.fabs.f(float %23) #4, !dbg !21 |
|
%.0.i = select i1 %.not.i, float %27, float %26, !dbg !21 |
|
%28 = fcmp oge float %.0.i, 0x3FF00C1FC0000000, !dbg !21 |
|
br i1 %28, label %__nv_fabsf.exit1.i, label %30, !dbg !21 |
|
|
|
__nv_fabsf.exit1.i: |
|
%29 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not1.i = icmp eq i32 %29, 0, !dbg !21 |
|
%.01.i = select i1 %.not1.i, float %27, float %26, !dbg !21 |
|
br label %__internal_fmad.exit.i, !dbg !21 |
|
|
|
30: |
|
%31 = fmul float %23, %23, !dbg !21 |
|
br label %__internal_fmad.exit.i, !dbg !21 |
|
|
|
__internal_fmad.exit.i: |
|
%32 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1.i ], [ 0x3FC06EBA60000000, %30 ], !dbg !21 |
|
%33 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1.i ], [ 0xBFD8127580000000, %30 ], !dbg !21 |
|
%34 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1.i ], [ 0x3FBCE315E0000000, %30 ], !dbg !21 |
|
%35 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1.i ], [ 0xBF9B837CE0000000, %30 ], !dbg !21 |
|
%36 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1.i ], [ 0x3F755ABD40000000, %30 ], !dbg !21 |
|
%37 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1.i ], [ 0xBF4AE9A400000000, %30 ], !dbg !21 |
|
%38 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1.i ], [ 0x3F163D2D40000000, %30 ], !dbg !21 |
|
%39 = phi float [ %.01.i, %__nv_fabsf.exit1.i ], [ %31, %30 ], !dbg !21 |
|
%40 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not2.i = icmp eq i32 %40, 0, !dbg !21 |
|
%41 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %38, float %39, float %37) #4, !dbg !21 |
|
%42 = tail call float @llvm.nvvm.fma.rn.f(float %38, float %39, float %37) #4, !dbg !21 |
|
%.02.i = select i1 %.not2.i, float %42, float %41, !dbg !21 |
|
%43 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not3.i = icmp eq i32 %43, 0, !dbg !21 |
|
%44 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i, float %39, float %36) #4, !dbg !21 |
|
%45 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i, float %39, float %36) #4, !dbg !21 |
|
%.03.i = select i1 %.not3.i, float %45, float %44, !dbg !21 |
|
%46 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not4.i = icmp eq i32 %46, 0, !dbg !21 |
|
%47 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03.i, float %39, float %35) #4, !dbg !21 |
|
%48 = tail call float @llvm.nvvm.fma.rn.f(float %.03.i, float %39, float %35) #4, !dbg !21 |
|
%.04.i = select i1 %.not4.i, float %48, float %47, !dbg !21 |
|
%49 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not5.i = icmp eq i32 %49, 0, !dbg !21 |
|
%50 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04.i, float %39, float %34) #4, !dbg !21 |
|
%51 = tail call float @llvm.nvvm.fma.rn.f(float %.04.i, float %39, float %34) #4, !dbg !21 |
|
%.05.i = select i1 %.not5.i, float %51, float %50, !dbg !21 |
|
%52 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not6.i = icmp eq i32 %52, 0, !dbg !21 |
|
%53 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i, float %39, float %33) #4, !dbg !21 |
|
%54 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i, float %39, float %33) #4, !dbg !21 |
|
%.06.i = select i1 %.not6.i, float %54, float %53, !dbg !21 |
|
%55 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not7.i = icmp eq i32 %55, 0, !dbg !21 |
|
%56 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06.i, float %39, float %32) #4, !dbg !21 |
|
%57 = tail call float @llvm.nvvm.fma.rn.f(float %.06.i, float %39, float %32) #4, !dbg !21 |
|
%.07.i = select i1 %.not7.i, float %57, float %56, !dbg !21 |
|
%58 = fneg float %39, !dbg !21 |
|
%59 = select i1 %28, float %58, float %23, !dbg !21 |
|
%60 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not8.i = icmp eq i32 %60, 0, !dbg !21 |
|
%61 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07.i, float %59, float %59) #4, !dbg !21 |
|
%62 = tail call float @llvm.nvvm.fma.rn.f(float %.07.i, float %59, float %59) #4, !dbg !21 |
|
%.08.i = select i1 %.not8.i, float %62, float %61, !dbg !21 |
|
br i1 %28, label %63, label %__nv_erff.exit, !dbg !21 |
|
|
|
63: |
|
%64 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08.i) #4, !dbg !21 |
|
%65 = fsub float 1.000000e+00, %64, !dbg !21 |
|
%66 = bitcast float %65 to i32, !dbg !21 |
|
%67 = bitcast float %23 to i32, !dbg !21 |
|
%68 = and i32 %67, -2147483648, !dbg !21 |
|
%69 = or i32 %68, %66, !dbg !21 |
|
%70 = bitcast i32 %69 to float, !dbg !21 |
|
br label %__nv_erff.exit, !dbg !21 |
|
|
|
__nv_erff.exit: |
|
%r.0.i = phi float [ %70, %63 ], [ %.08.i, %__internal_fmad.exit.i ], !dbg !21 |
|
%71 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not.i2 = icmp eq i32 %71, 0, !dbg !21 |
|
%72 = tail call float @llvm.nvvm.fabs.ftz.f(float %24) #4, !dbg !21 |
|
%73 = tail call float @llvm.nvvm.fabs.f(float %24) #4, !dbg !21 |
|
%.0.i3 = select i1 %.not.i2, float %73, float %72, !dbg !21 |
|
%74 = fcmp oge float %.0.i3, 0x3FF00C1FC0000000, !dbg !21 |
|
br i1 %74, label %__nv_fabsf.exit1.i20, label %76, !dbg !21 |
|
|
|
__nv_fabsf.exit1.i20: |
|
%75 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not1.i21 = icmp eq i32 %75, 0, !dbg !21 |
|
%.01.i22 = select i1 %.not1.i21, float %73, float %72, !dbg !21 |
|
br label %__internal_fmad.exit.i4, !dbg !21 |
|
|
|
76: |
|
%77 = fmul float %24, %24, !dbg !21 |
|
br label %__internal_fmad.exit.i4, !dbg !21 |
|
|
|
__internal_fmad.exit.i4: |
|
%78 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1.i20 ], [ 0x3FC06EBA60000000, %76 ], !dbg !21 |
|
%79 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1.i20 ], [ 0xBFD8127580000000, %76 ], !dbg !21 |
|
%80 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1.i20 ], [ 0x3FBCE315E0000000, %76 ], !dbg !21 |
|
%81 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1.i20 ], [ 0xBF9B837CE0000000, %76 ], !dbg !21 |
|
%82 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1.i20 ], [ 0x3F755ABD40000000, %76 ], !dbg !21 |
|
%83 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1.i20 ], [ 0xBF4AE9A400000000, %76 ], !dbg !21 |
|
%84 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1.i20 ], [ 0x3F163D2D40000000, %76 ], !dbg !21 |
|
%85 = phi float [ %.01.i22, %__nv_fabsf.exit1.i20 ], [ %77, %76 ], !dbg !21 |
|
%86 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not2.i5 = icmp eq i32 %86, 0, !dbg !21 |
|
%87 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %84, float %85, float %83) #4, !dbg !21 |
|
%88 = tail call float @llvm.nvvm.fma.rn.f(float %84, float %85, float %83) #4, !dbg !21 |
|
%.02.i6 = select i1 %.not2.i5, float %88, float %87, !dbg !21 |
|
%89 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not3.i7 = icmp eq i32 %89, 0, !dbg !21 |
|
%90 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i6, float %85, float %82) #4, !dbg !21 |
|
%91 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i6, float %85, float %82) #4, !dbg !21 |
|
%.03.i8 = select i1 %.not3.i7, float %91, float %90, !dbg !21 |
|
%92 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not4.i9 = icmp eq i32 %92, 0, !dbg !21 |
|
%93 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03.i8, float %85, float %81) #4, !dbg !21 |
|
%94 = tail call float @llvm.nvvm.fma.rn.f(float %.03.i8, float %85, float %81) #4, !dbg !21 |
|
%.04.i10 = select i1 %.not4.i9, float %94, float %93, !dbg !21 |
|
%95 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not5.i11 = icmp eq i32 %95, 0, !dbg !21 |
|
%96 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04.i10, float %85, float %80) #4, !dbg !21 |
|
%97 = tail call float @llvm.nvvm.fma.rn.f(float %.04.i10, float %85, float %80) #4, !dbg !21 |
|
%.05.i12 = select i1 %.not5.i11, float %97, float %96, !dbg !21 |
|
%98 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not6.i13 = icmp eq i32 %98, 0, !dbg !21 |
|
%99 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i12, float %85, float %79) #4, !dbg !21 |
|
%100 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i12, float %85, float %79) #4, !dbg !21 |
|
%.06.i14 = select i1 %.not6.i13, float %100, float %99, !dbg !21 |
|
%101 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not7.i15 = icmp eq i32 %101, 0, !dbg !21 |
|
%102 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06.i14, float %85, float %78) #4, !dbg !21 |
|
%103 = tail call float @llvm.nvvm.fma.rn.f(float %.06.i14, float %85, float %78) #4, !dbg !21 |
|
%.07.i16 = select i1 %.not7.i15, float %103, float %102, !dbg !21 |
|
%104 = fneg float %85, !dbg !21 |
|
%105 = select i1 %74, float %104, float %24, !dbg !21 |
|
%106 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21 |
|
%.not8.i17 = icmp eq i32 %106, 0, !dbg !21 |
|
%107 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07.i16, float %105, float %105) #4, !dbg !21 |
|
%108 = tail call float @llvm.nvvm.fma.rn.f(float %.07.i16, float %105, float %105) #4, !dbg !21 |
|
%.08.i18 = select i1 %.not8.i17, float %108, float %107, !dbg !21 |
|
br i1 %74, label %109, label %__nv_erff.exit23, !dbg !21 |
|
|
|
109: |
|
%110 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08.i18) #4, !dbg !21 |
|
%111 = fsub float 1.000000e+00, %110, !dbg !21 |
|
%112 = bitcast float %111 to i32, !dbg !21 |
|
%113 = bitcast float %24 to i32, !dbg !21 |
|
%114 = and i32 %113, -2147483648, !dbg !21 |
|
%115 = or i32 %114, %112, !dbg !21 |
|
%116 = bitcast i32 %115 to float, !dbg !21 |
|
br label %__nv_erff.exit23, !dbg !21 |
|
|
|
__nv_erff.exit23: |
|
%r.0.i19 = phi float [ %116, %109 ], [ %.08.i18, %__internal_fmad.exit.i4 ], !dbg !21 |
|
%117 = fadd float %r.0.i, 1.000000e+00, !dbg !22 |
|
%118 = fadd float %r.0.i19, 1.000000e+00, !dbg !22 |
|
%119 = fmul float %117, 5.000000e-01, !dbg !23 |
|
%120 = fmul float %118, 5.000000e-01, !dbg !23 |
|
%121 = fmul float %21, %21, !dbg !24 |
|
%122 = fmul float %22, %22, !dbg !24 |
|
%123 = fmul float %121, -5.000000e-01, !dbg !25 |
|
%124 = fmul float %122, -5.000000e-01, !dbg !25 |
|
%125 = fmul float %123, 0x3FF7154760000000, !dbg !26 |
|
%126 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %125) #4, !dbg !26 |
|
%127 = fmul float %124, 0x3FF7154760000000, !dbg !26 |
|
%128 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %127) #4, !dbg !26 |
|
%129 = fmul float %126, 0x3FD9884540000000, !dbg !27 |
|
%130 = fmul float %128, 0x3FD9884540000000, !dbg !27 |
|
%131 = fmul float %21, %129, !dbg !28 |
|
%132 = fmul float %22, %130, !dbg !28 |
|
%133 = fadd float %119, %131, !dbg !29 |
|
%134 = fadd float %120, %132, !dbg !29 |
|
%135 = fmul float %15, %133, !dbg !30 |
|
%136 = fmul float %16, %134, !dbg !30 |
|
%137 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %135) #4, !dbg !31 |
|
%138 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %136) #4, !dbg !31 |
|
%139 = insertelement <2 x i16> undef, i16 %137, i64 0, !dbg !31 |
|
%140 = insertelement <2 x i16> %139, i16 %138, i64 1, !dbg !31 |
|
%141 = bitcast <2 x i16> %140 to i32, !dbg !31 |
|
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %141, ptr addrspace(1) %11, i1 true) #4, !dbg !31 |
|
ret void, !dbg !32 |
|
} |
|
|
|
|
|
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0 |
|
|
|
|
|
define float @__nv_erff(float %a) local_unnamed_addr #1 { |
|
__nv_fabsf.exit: |
|
%0 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4 |
|
%.not = icmp eq i32 %0, 0 |
|
%1 = tail call float @llvm.nvvm.fabs.ftz.f(float %a) #4 |
|
%2 = tail call float @llvm.nvvm.fabs.f(float %a) #4 |
|
%.0 = select i1 %.not, float %2, float %1 |
|
%3 = fcmp oge float %.0, 0x3FF00C1FC0000000 |
|
br i1 %3, label %__nv_fabsf.exit1, label %5 |
|
|
|
__nv_fabsf.exit1: |
|
%4 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4 |
|
%.not1 = icmp eq i32 %4, 0 |
|
%.01 = select i1 %.not1, float %2, float %1 |
|
br label %__internal_fmad.exit |
|
|
|
5: |
|
%6 = fmul float %a, %a |
|
br label %__internal_fmad.exit |
|
|
|
__internal_fmad.exit: |
|
%7 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1 ], [ 0x3FC06EBA60000000, %5 ] |
|
%8 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1 ], [ 0xBFD8127580000000, %5 ] |
|
%9 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1 ], [ 0x3FBCE315E0000000, %5 ] |
|
%10 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1 ], [ 0xBF9B837CE0000000, %5 ] |
|
%11 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1 ], [ 0x3F755ABD40000000, %5 ] |
|
%12 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1 ], [ 0xBF4AE9A400000000, %5 ] |
|
%13 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1 ], [ 0x3F163D2D40000000, %5 ] |
|
%14 = phi float [ %.01, %__nv_fabsf.exit1 ], [ %6, %5 ] |
|
%15 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4 |
|
%.not2 = icmp eq i32 %15, 0 |
|
%16 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %13, float %14, float %12) #4 |
|
%17 = tail call float @llvm.nvvm.fma.rn.f(float %13, float %14, float %12) #4 |
|
%.02 = select i1 %.not2, float %17, float %16 |
|
%18 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4 |
|
%.not3 = icmp eq i32 %18, 0 |
|
%19 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02, float %14, float %11) #4 |
|
%20 = tail call float @llvm.nvvm.fma.rn.f(float %.02, float %14, float %11) #4 |
|
%.03 = select i1 %.not3, float %20, float %19 |
|
%21 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4 |
|
%.not4 = icmp eq i32 %21, 0 |
|
%22 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03, float %14, float %10) #4 |
|
%23 = tail call float @llvm.nvvm.fma.rn.f(float %.03, float %14, float %10) #4 |
|
%.04 = select i1 %.not4, float %23, float %22 |
|
%24 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4 |
|
%.not5 = icmp eq i32 %24, 0 |
|
%25 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04, float %14, float %9) #4 |
|
%26 = tail call float @llvm.nvvm.fma.rn.f(float %.04, float %14, float %9) #4 |
|
%.05 = select i1 %.not5, float %26, float %25 |
|
%27 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4 |
|
%.not6 = icmp eq i32 %27, 0 |
|
%28 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05, float %14, float %8) #4 |
|
%29 = tail call float @llvm.nvvm.fma.rn.f(float %.05, float %14, float %8) #4 |
|
%.06 = select i1 %.not6, float %29, float %28 |
|
%30 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4 |
|
%.not7 = icmp eq i32 %30, 0 |
|
%31 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06, float %14, float %7) #4 |
|
%32 = tail call float @llvm.nvvm.fma.rn.f(float %.06, float %14, float %7) #4 |
|
%.07 = select i1 %.not7, float %32, float %31 |
|
%33 = fneg float %14 |
|
%34 = select i1 %3, float %33, float %a |
|
%35 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4 |
|
%.not8 = icmp eq i32 %35, 0 |
|
%36 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07, float %34, float %34) #4 |
|
%37 = tail call float @llvm.nvvm.fma.rn.f(float %.07, float %34, float %34) #4 |
|
%.08 = select i1 %.not8, float %37, float %36 |
|
br i1 %3, label %38, label %46 |
|
|
|
38: |
|
%39 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08) #4 |
|
%40 = fsub float 1.000000e+00, %39 |
|
%41 = bitcast float %40 to i32 |
|
%42 = bitcast float %a to i32 |
|
%43 = and i32 %42, -2147483648 |
|
%44 = or i32 %43, %41 |
|
%45 = bitcast i32 %44 to float |
|
br label %46 |
|
|
|
46: |
|
%r.0 = phi float [ %45, %38 ], [ %.08, %__internal_fmad.exit ] |
|
ret float %r.0 |
|
} |
|
|
|
declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #2 |
|
|
|
|
|
declare float @llvm.nvvm.fabs.ftz.f(float) #0 |
|
|
|
|
|
declare float @llvm.nvvm.fabs.f(float) #0 |
|
|
|
|
|
declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float) #0 |
|
|
|
|
|
declare float @llvm.nvvm.fma.rn.f(float, float, float) #0 |
|
|
|
|
|
declare float @llvm.nvvm.ex2.approx.ftz.f(float) #3 |
|
|
|
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
|
attributes #1 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } |
|
attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } |
|
attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) } |
|
attributes #4 = { nounwind } |
|
|
|
!llvm.module.flags = !{!0, !1} |
|
!llvm.dbg.cu = !{!2} |
|
!nvvm.annotations = !{!4, !5, !5, !4} |
|
!llvm.ident = !{!6} |
|
|
|
!0 = !{i32 2, !"Debug Info Version", i32 3} |
|
!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1} |
|
!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) |
|
!3 = !DIFile(filename: "c5jxaguxho3nhrlt5vcinnz5fevodumlpwn4wyb2vx3xrveicerl.py", directory: "/tmp/torchinductor_root/5j") |
|
!4 = !{ptr @triton__0d1d2de, !"kernel", i32 1} |
|
!5 = !{ptr @triton__0d1d2de, !"maxntidx", i32 256} |
|
!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} |
|
!7 = distinct !DISubprogram(name: "triton__0d1d2de", linkageName: "triton__0d1d2de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) |
|
!8 = !DISubroutineType(cc: DW_CC_normal, types: !9) |
|
!9 = !{} |
|
!10 = !DILocation(line: 21, column: 36, scope: !7) |
|
!11 = !DILocation(line: 20, column: 28, scope: !7) |
|
!12 = !DILocation(line: 20, column: 33, scope: !7) |
|
!13 = !DILocation(line: 21, column: 23, scope: !7) |
|
!14 = !DILocation(line: 24, column: 34, scope: !7) |
|
!15 = !DILocation(line: 24, column: 39, scope: !7) |
|
!16 = !DILocation(line: 24, column: 48, scope: !7) |
|
!17 = !DILocation(line: 25, column: 30, scope: !7) |
|
!18 = !DILocation(line: 25, column: 35, scope: !7) |
|
!19 = !DILocation(line: 25, column: 44, scope: !7) |
|
!20 = !DILocation(line: 29, column: 18, scope: !7) |
|
!21 = !DILocation(line: 30, column: 23, scope: !7) |
|
!22 = !DILocation(line: 32, column: 18, scope: !7) |
|
!23 = !DILocation(line: 34, column: 19, scope: !7) |
|
!24 = !DILocation(line: 35, column: 19, scope: !7) |
|
!25 = !DILocation(line: 37, column: 20, scope: !7) |
|
!26 = !DILocation(line: 38, column: 19, scope: !7) |
|
!27 = !DILocation(line: 40, column: 20, scope: !7) |
|
!28 = !DILocation(line: 41, column: 19, scope: !7) |
|
!29 = !DILocation(line: 42, column: 20, scope: !7) |
|
!30 = !DILocation(line: 43, column: 19, scope: !7) |
|
!31 = !DILocation(line: 45, column: 40, scope: !7) |
|
!32 = !DILocation(line: 45, column: 4, scope: !7) |
|
|