0-hero's picture
Add files using upload-large-folder tool
9b31431 verified
raw
history blame
21.5 kB
; ModuleID = 'LLVMDialectModule'
source_filename = "LLVMDialectModule"
@.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1
define void @triton__0d1d2de(ptr addrspace(1) %0, ptr addrspace(1) %1, i32 %2) local_unnamed_addr !dbg !7 {
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
%5 = shl i32 %4, 1, !dbg !10
%6 = and i32 %5, 510, !dbg !10
%7 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #4, !dbg !11
%8 = shl i32 %7, 9, !dbg !12
%9 = or i32 %8, %6, !dbg !13
%10 = sext i32 %9 to i64, !dbg !14
%11 = getelementptr i16, ptr addrspace(1) %0, i64 %10, !dbg !14
%12 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %11, i1 true) #4, !dbg !15
%13 = trunc i32 %12 to i16, !dbg !15
%extelt.offset = lshr i32 %12, 16, !dbg !15
%14 = trunc i32 %extelt.offset to i16, !dbg !15
%15 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %13) #4, !dbg !16
%16 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %14) #4, !dbg !16
%17 = getelementptr i16, ptr addrspace(1) %1, i64 %10, !dbg !17
%18 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %17, i1 true) #4, !dbg !18
%19 = trunc i32 %18 to i16, !dbg !18
%extelt.offset1 = lshr i32 %18, 16, !dbg !18
%20 = trunc i32 %extelt.offset1 to i16, !dbg !18
%21 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %19) #4, !dbg !19
%22 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %20) #4, !dbg !19
%23 = fmul float %21, 0x3FE6A09E60000000, !dbg !20
%24 = fmul float %22, 0x3FE6A09E60000000, !dbg !20
%25 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not.i = icmp eq i32 %25, 0, !dbg !21
%26 = tail call float @llvm.nvvm.fabs.ftz.f(float %23) #4, !dbg !21
%27 = tail call float @llvm.nvvm.fabs.f(float %23) #4, !dbg !21
%.0.i = select i1 %.not.i, float %27, float %26, !dbg !21
%28 = fcmp oge float %.0.i, 0x3FF00C1FC0000000, !dbg !21
br i1 %28, label %__nv_fabsf.exit1.i, label %30, !dbg !21
__nv_fabsf.exit1.i: ; preds = %3
%29 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not1.i = icmp eq i32 %29, 0, !dbg !21
%.01.i = select i1 %.not1.i, float %27, float %26, !dbg !21
br label %__internal_fmad.exit.i, !dbg !21
30: ; preds = %3
%31 = fmul float %23, %23, !dbg !21
br label %__internal_fmad.exit.i, !dbg !21
__internal_fmad.exit.i: ; preds = %30, %__nv_fabsf.exit1.i
%32 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1.i ], [ 0x3FC06EBA60000000, %30 ], !dbg !21
%33 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1.i ], [ 0xBFD8127580000000, %30 ], !dbg !21
%34 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1.i ], [ 0x3FBCE315E0000000, %30 ], !dbg !21
%35 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1.i ], [ 0xBF9B837CE0000000, %30 ], !dbg !21
%36 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1.i ], [ 0x3F755ABD40000000, %30 ], !dbg !21
%37 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1.i ], [ 0xBF4AE9A400000000, %30 ], !dbg !21
%38 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1.i ], [ 0x3F163D2D40000000, %30 ], !dbg !21
%39 = phi float [ %.01.i, %__nv_fabsf.exit1.i ], [ %31, %30 ], !dbg !21
%40 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not2.i = icmp eq i32 %40, 0, !dbg !21
%41 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %38, float %39, float %37) #4, !dbg !21
%42 = tail call float @llvm.nvvm.fma.rn.f(float %38, float %39, float %37) #4, !dbg !21
%.02.i = select i1 %.not2.i, float %42, float %41, !dbg !21
%43 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not3.i = icmp eq i32 %43, 0, !dbg !21
%44 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i, float %39, float %36) #4, !dbg !21
%45 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i, float %39, float %36) #4, !dbg !21
%.03.i = select i1 %.not3.i, float %45, float %44, !dbg !21
%46 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not4.i = icmp eq i32 %46, 0, !dbg !21
%47 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03.i, float %39, float %35) #4, !dbg !21
%48 = tail call float @llvm.nvvm.fma.rn.f(float %.03.i, float %39, float %35) #4, !dbg !21
%.04.i = select i1 %.not4.i, float %48, float %47, !dbg !21
%49 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not5.i = icmp eq i32 %49, 0, !dbg !21
%50 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04.i, float %39, float %34) #4, !dbg !21
%51 = tail call float @llvm.nvvm.fma.rn.f(float %.04.i, float %39, float %34) #4, !dbg !21
%.05.i = select i1 %.not5.i, float %51, float %50, !dbg !21
%52 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not6.i = icmp eq i32 %52, 0, !dbg !21
%53 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i, float %39, float %33) #4, !dbg !21
%54 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i, float %39, float %33) #4, !dbg !21
%.06.i = select i1 %.not6.i, float %54, float %53, !dbg !21
%55 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not7.i = icmp eq i32 %55, 0, !dbg !21
%56 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06.i, float %39, float %32) #4, !dbg !21
%57 = tail call float @llvm.nvvm.fma.rn.f(float %.06.i, float %39, float %32) #4, !dbg !21
%.07.i = select i1 %.not7.i, float %57, float %56, !dbg !21
%58 = fneg float %39, !dbg !21
%59 = select i1 %28, float %58, float %23, !dbg !21
%60 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not8.i = icmp eq i32 %60, 0, !dbg !21
%61 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07.i, float %59, float %59) #4, !dbg !21
%62 = tail call float @llvm.nvvm.fma.rn.f(float %.07.i, float %59, float %59) #4, !dbg !21
%.08.i = select i1 %.not8.i, float %62, float %61, !dbg !21
br i1 %28, label %63, label %__nv_erff.exit, !dbg !21
63: ; preds = %__internal_fmad.exit.i
%64 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08.i) #4, !dbg !21
%65 = fsub float 1.000000e+00, %64, !dbg !21
%66 = bitcast float %65 to i32, !dbg !21
%67 = bitcast float %23 to i32, !dbg !21
%68 = and i32 %67, -2147483648, !dbg !21
%69 = or i32 %68, %66, !dbg !21
%70 = bitcast i32 %69 to float, !dbg !21
br label %__nv_erff.exit, !dbg !21
__nv_erff.exit: ; preds = %__internal_fmad.exit.i, %63
%r.0.i = phi float [ %70, %63 ], [ %.08.i, %__internal_fmad.exit.i ], !dbg !21
%71 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not.i2 = icmp eq i32 %71, 0, !dbg !21
%72 = tail call float @llvm.nvvm.fabs.ftz.f(float %24) #4, !dbg !21
%73 = tail call float @llvm.nvvm.fabs.f(float %24) #4, !dbg !21
%.0.i3 = select i1 %.not.i2, float %73, float %72, !dbg !21
%74 = fcmp oge float %.0.i3, 0x3FF00C1FC0000000, !dbg !21
br i1 %74, label %__nv_fabsf.exit1.i20, label %76, !dbg !21
__nv_fabsf.exit1.i20: ; preds = %__nv_erff.exit
%75 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not1.i21 = icmp eq i32 %75, 0, !dbg !21
%.01.i22 = select i1 %.not1.i21, float %73, float %72, !dbg !21
br label %__internal_fmad.exit.i4, !dbg !21
76: ; preds = %__nv_erff.exit
%77 = fmul float %24, %24, !dbg !21
br label %__internal_fmad.exit.i4, !dbg !21
__internal_fmad.exit.i4: ; preds = %76, %__nv_fabsf.exit1.i20
%78 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1.i20 ], [ 0x3FC06EBA60000000, %76 ], !dbg !21
%79 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1.i20 ], [ 0xBFD8127580000000, %76 ], !dbg !21
%80 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1.i20 ], [ 0x3FBCE315E0000000, %76 ], !dbg !21
%81 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1.i20 ], [ 0xBF9B837CE0000000, %76 ], !dbg !21
%82 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1.i20 ], [ 0x3F755ABD40000000, %76 ], !dbg !21
%83 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1.i20 ], [ 0xBF4AE9A400000000, %76 ], !dbg !21
%84 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1.i20 ], [ 0x3F163D2D40000000, %76 ], !dbg !21
%85 = phi float [ %.01.i22, %__nv_fabsf.exit1.i20 ], [ %77, %76 ], !dbg !21
%86 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not2.i5 = icmp eq i32 %86, 0, !dbg !21
%87 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %84, float %85, float %83) #4, !dbg !21
%88 = tail call float @llvm.nvvm.fma.rn.f(float %84, float %85, float %83) #4, !dbg !21
%.02.i6 = select i1 %.not2.i5, float %88, float %87, !dbg !21
%89 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not3.i7 = icmp eq i32 %89, 0, !dbg !21
%90 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02.i6, float %85, float %82) #4, !dbg !21
%91 = tail call float @llvm.nvvm.fma.rn.f(float %.02.i6, float %85, float %82) #4, !dbg !21
%.03.i8 = select i1 %.not3.i7, float %91, float %90, !dbg !21
%92 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not4.i9 = icmp eq i32 %92, 0, !dbg !21
%93 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03.i8, float %85, float %81) #4, !dbg !21
%94 = tail call float @llvm.nvvm.fma.rn.f(float %.03.i8, float %85, float %81) #4, !dbg !21
%.04.i10 = select i1 %.not4.i9, float %94, float %93, !dbg !21
%95 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not5.i11 = icmp eq i32 %95, 0, !dbg !21
%96 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04.i10, float %85, float %80) #4, !dbg !21
%97 = tail call float @llvm.nvvm.fma.rn.f(float %.04.i10, float %85, float %80) #4, !dbg !21
%.05.i12 = select i1 %.not5.i11, float %97, float %96, !dbg !21
%98 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not6.i13 = icmp eq i32 %98, 0, !dbg !21
%99 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05.i12, float %85, float %79) #4, !dbg !21
%100 = tail call float @llvm.nvvm.fma.rn.f(float %.05.i12, float %85, float %79) #4, !dbg !21
%.06.i14 = select i1 %.not6.i13, float %100, float %99, !dbg !21
%101 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not7.i15 = icmp eq i32 %101, 0, !dbg !21
%102 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06.i14, float %85, float %78) #4, !dbg !21
%103 = tail call float @llvm.nvvm.fma.rn.f(float %.06.i14, float %85, float %78) #4, !dbg !21
%.07.i16 = select i1 %.not7.i15, float %103, float %102, !dbg !21
%104 = fneg float %85, !dbg !21
%105 = select i1 %74, float %104, float %24, !dbg !21
%106 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4, !dbg !21
%.not8.i17 = icmp eq i32 %106, 0, !dbg !21
%107 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07.i16, float %105, float %105) #4, !dbg !21
%108 = tail call float @llvm.nvvm.fma.rn.f(float %.07.i16, float %105, float %105) #4, !dbg !21
%.08.i18 = select i1 %.not8.i17, float %108, float %107, !dbg !21
br i1 %74, label %109, label %__nv_erff.exit23, !dbg !21
109: ; preds = %__internal_fmad.exit.i4
%110 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08.i18) #4, !dbg !21
%111 = fsub float 1.000000e+00, %110, !dbg !21
%112 = bitcast float %111 to i32, !dbg !21
%113 = bitcast float %24 to i32, !dbg !21
%114 = and i32 %113, -2147483648, !dbg !21
%115 = or i32 %114, %112, !dbg !21
%116 = bitcast i32 %115 to float, !dbg !21
br label %__nv_erff.exit23, !dbg !21
__nv_erff.exit23: ; preds = %__internal_fmad.exit.i4, %109
%r.0.i19 = phi float [ %116, %109 ], [ %.08.i18, %__internal_fmad.exit.i4 ], !dbg !21
%117 = fadd float %r.0.i, 1.000000e+00, !dbg !22
%118 = fadd float %r.0.i19, 1.000000e+00, !dbg !22
%119 = fmul float %117, 5.000000e-01, !dbg !23
%120 = fmul float %118, 5.000000e-01, !dbg !23
%121 = fmul float %21, %21, !dbg !24
%122 = fmul float %22, %22, !dbg !24
%123 = fmul float %121, -5.000000e-01, !dbg !25
%124 = fmul float %122, -5.000000e-01, !dbg !25
%125 = fmul float %123, 0x3FF7154760000000, !dbg !26
%126 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %125) #4, !dbg !26
%127 = fmul float %124, 0x3FF7154760000000, !dbg !26
%128 = tail call float asm "ex2.approx.f32 $0, $1;", "=f,f"(float %127) #4, !dbg !26
%129 = fmul float %126, 0x3FD9884540000000, !dbg !27
%130 = fmul float %128, 0x3FD9884540000000, !dbg !27
%131 = fmul float %21, %129, !dbg !28
%132 = fmul float %22, %130, !dbg !28
%133 = fadd float %119, %131, !dbg !29
%134 = fadd float %120, %132, !dbg !29
%135 = fmul float %15, %133, !dbg !30
%136 = fmul float %16, %134, !dbg !30
%137 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %135) #4, !dbg !31
%138 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %136) #4, !dbg !31
%139 = insertelement <2 x i16> undef, i16 %137, i64 0, !dbg !31
%140 = insertelement <2 x i16> %139, i16 %138, i64 1, !dbg !31
%141 = bitcast <2 x i16> %140 to i32, !dbg !31
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %141, ptr addrspace(1) %11, i1 true) #4, !dbg !31
ret void, !dbg !32
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
; Function Attrs: alwaysinline nounwind
define float @__nv_erff(float %a) local_unnamed_addr #1 {
__nv_fabsf.exit:
%0 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
%.not = icmp eq i32 %0, 0
%1 = tail call float @llvm.nvvm.fabs.ftz.f(float %a) #4
%2 = tail call float @llvm.nvvm.fabs.f(float %a) #4
%.0 = select i1 %.not, float %2, float %1
%3 = fcmp oge float %.0, 0x3FF00C1FC0000000
br i1 %3, label %__nv_fabsf.exit1, label %5
__nv_fabsf.exit1: ; preds = %__nv_fabsf.exit
%4 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
%.not1 = icmp eq i32 %4, 0
%.01 = select i1 %.not1, float %2, float %1
br label %__internal_fmad.exit
5: ; preds = %__nv_fabsf.exit
%6 = fmul float %a, %a
br label %__internal_fmad.exit
__internal_fmad.exit: ; preds = %5, %__nv_fabsf.exit1
%7 = phi float [ 0x3FE41B0840000000, %__nv_fabsf.exit1 ], [ 0x3FC06EBA60000000, %5 ]
%8 = phi float [ 0x3FED526FC0000000, %__nv_fabsf.exit1 ], [ 0xBFD8127580000000, %5 ]
%9 = phi float [ 0x3FC39F20C0000000, %__nv_fabsf.exit1 ], [ 0x3FBCE315E0000000, %5 ]
%10 = phi float [ 0xBFA1902C40000000, %__nv_fabsf.exit1 ], [ 0xBF9B837CE0000000, %5 ]
%11 = phi float [ 0x3F75908160000000, %__nv_fabsf.exit1 ], [ 0x3F755ABD40000000, %5 ]
%12 = phi float [ 0xBF3EAC1720000000, %__nv_fabsf.exit1 ], [ 0xBF4AE9A400000000, %5 ]
%13 = phi float [ 0x3EF1394780000000, %__nv_fabsf.exit1 ], [ 0x3F163D2D40000000, %5 ]
%14 = phi float [ %.01, %__nv_fabsf.exit1 ], [ %6, %5 ]
%15 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
%.not2 = icmp eq i32 %15, 0
%16 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %13, float %14, float %12) #4
%17 = tail call float @llvm.nvvm.fma.rn.f(float %13, float %14, float %12) #4
%.02 = select i1 %.not2, float %17, float %16
%18 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
%.not3 = icmp eq i32 %18, 0
%19 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.02, float %14, float %11) #4
%20 = tail call float @llvm.nvvm.fma.rn.f(float %.02, float %14, float %11) #4
%.03 = select i1 %.not3, float %20, float %19
%21 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
%.not4 = icmp eq i32 %21, 0
%22 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.03, float %14, float %10) #4
%23 = tail call float @llvm.nvvm.fma.rn.f(float %.03, float %14, float %10) #4
%.04 = select i1 %.not4, float %23, float %22
%24 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
%.not5 = icmp eq i32 %24, 0
%25 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.04, float %14, float %9) #4
%26 = tail call float @llvm.nvvm.fma.rn.f(float %.04, float %14, float %9) #4
%.05 = select i1 %.not5, float %26, float %25
%27 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
%.not6 = icmp eq i32 %27, 0
%28 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.05, float %14, float %8) #4
%29 = tail call float @llvm.nvvm.fma.rn.f(float %.05, float %14, float %8) #4
%.06 = select i1 %.not6, float %29, float %28
%30 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
%.not7 = icmp eq i32 %30, 0
%31 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.06, float %14, float %7) #4
%32 = tail call float @llvm.nvvm.fma.rn.f(float %.06, float %14, float %7) #4
%.07 = select i1 %.not7, float %32, float %31
%33 = fneg float %14
%34 = select i1 %3, float %33, float %a
%35 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #4
%.not8 = icmp eq i32 %35, 0
%36 = tail call float @llvm.nvvm.fma.rn.ftz.f(float %.07, float %34, float %34) #4
%37 = tail call float @llvm.nvvm.fma.rn.f(float %.07, float %34, float %34) #4
%.08 = select i1 %.not8, float %37, float %36
br i1 %3, label %38, label %46
38: ; preds = %__internal_fmad.exit
%39 = tail call float @llvm.nvvm.ex2.approx.ftz.f(float %.08) #4
%40 = fsub float 1.000000e+00, %39
%41 = bitcast float %40 to i32
%42 = bitcast float %a to i32
%43 = and i32 %42, -2147483648
%44 = or i32 %43, %41
%45 = bitcast i32 %44 to float
br label %46
46: ; preds = %38, %__internal_fmad.exit
%r.0 = phi float [ %45, %38 ], [ %.08, %__internal_fmad.exit ]
ret float %r.0
}
declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #2
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.nvvm.fabs.ftz.f(float) #0
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.nvvm.fabs.f(float) #0
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float) #0
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.nvvm.fma.rn.f(float, float, float) #0
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare float @llvm.nvvm.ex2.approx.ftz.f(float) #3
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
attributes #4 = { nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2}
!nvvm.annotations = !{!4, !5, !5, !4}
!llvm.ident = !{!6}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!3 = !DIFile(filename: "c5jxaguxho3nhrlt5vcinnz5fevodumlpwn4wyb2vx3xrveicerl.py", directory: "/tmp/torchinductor_root/5j")
!4 = !{ptr @triton__0d1d2de, !"kernel", i32 1}
!5 = !{ptr @triton__0d1d2de, !"maxntidx", i32 256}
!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
!7 = distinct !DISubprogram(name: "triton__0d1d2de", linkageName: "triton__0d1d2de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
!8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
!9 = !{}
!10 = !DILocation(line: 21, column: 36, scope: !7)
!11 = !DILocation(line: 20, column: 28, scope: !7)
!12 = !DILocation(line: 20, column: 33, scope: !7)
!13 = !DILocation(line: 21, column: 23, scope: !7)
!14 = !DILocation(line: 24, column: 34, scope: !7)
!15 = !DILocation(line: 24, column: 39, scope: !7)
!16 = !DILocation(line: 24, column: 48, scope: !7)
!17 = !DILocation(line: 25, column: 30, scope: !7)
!18 = !DILocation(line: 25, column: 35, scope: !7)
!19 = !DILocation(line: 25, column: 44, scope: !7)
!20 = !DILocation(line: 29, column: 18, scope: !7)
!21 = !DILocation(line: 30, column: 23, scope: !7)
!22 = !DILocation(line: 32, column: 18, scope: !7)
!23 = !DILocation(line: 34, column: 19, scope: !7)
!24 = !DILocation(line: 35, column: 19, scope: !7)
!25 = !DILocation(line: 37, column: 20, scope: !7)
!26 = !DILocation(line: 38, column: 19, scope: !7)
!27 = !DILocation(line: 40, column: 20, scope: !7)
!28 = !DILocation(line: 41, column: 19, scope: !7)
!29 = !DILocation(line: 42, column: 20, scope: !7)
!30 = !DILocation(line: 43, column: 19, scope: !7)
!31 = !DILocation(line: 45, column: 40, scope: !7)
!32 = !DILocation(line: 45, column: 4, scope: !7)