0-hero's picture
Add files using upload-large-folder tool
4bf7f8a verified
raw
history blame
21.3 kB
; ModuleID = 'LLVMDialectModule'
source_filename = "LLVMDialectModule"
@assertFunc_0 = internal constant [25 x i8] c"_call_with_frames_removed"
@assertFile_0 = internal constant [38 x i8] c"<frozen importlib._bootstrap_external>"
@assertMessage_0 = internal constant [38 x i8] c"index out of bounds: 0 <= tmp3 < 50257"
@global_smem = external addrspace(3) global [0 x i8]
@.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1
declare void @__assertfail(ptr, ptr, i32, ptr, i64) local_unnamed_addr
define void @triton__0d1d2d3d4d5d6d7d8de9de(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, ptr addrspace(1) %4, ptr addrspace(1) %5, ptr addrspace(1) %6, ptr addrspace(1) %7, i32 %8, i32 %9) local_unnamed_addr !dbg !7 {
%11 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
%urem = and i32 %11, 63, !dbg !10
%12 = shl nuw nsw i32 %urem, 2, !dbg !10
%13 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #6, !dbg !11
%14 = srem i32 %13, 512, !dbg !12
%15 = sext i32 %13 to i64, !dbg !13
%16 = getelementptr i64, ptr addrspace(1) %1, i64 %15, !dbg !13
%17 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %16, i1 true) #6, !dbg !14
%18 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %16, i1 true) #6, !dbg !14
%19 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %16, i1 true) #6, !dbg !14
%20 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %16, i1 true) #6, !dbg !14
%21 = tail call i64 asm sideeffect "mov.u64 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b64 { $0 }, [ $1 + 0 ];", "=l,l,b"(ptr addrspace(1) %16, i1 true) #6, !dbg !14
%22 = shl nsw i32 %14, 8, !dbg !15
%23 = or i32 %22, %12, !dbg !16
%24 = sext i32 %23 to i64, !dbg !17
%25 = getelementptr float, ptr addrspace(1) %3, i64 %24, !dbg !17
%26 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.L1::evict_last.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %25, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !18
%27 = zext nneg i32 %12 to i64, !dbg !19
%28 = getelementptr float, ptr addrspace(1) %4, i64 %27, !dbg !19
%29 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.L1::evict_last.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %28, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !20
%30 = add i64 %21, 50257, !dbg !21
%31 = icmp slt i64 %21, 0, !dbg !22
%32 = select i1 %31, i64 %30, i64 %21, !dbg !23
%33 = icmp ugt i64 %32, 50256, !dbg !24
br i1 %33, label %34, label %35, !dbg !25
34: ; preds = %10
tail call void @__assertfail(ptr nonnull @assertMessage_0, ptr nonnull @assertFile_0, i32 883, ptr nonnull @assertFunc_0, i64 1), !dbg !25
br label %35, !dbg !25
35: ; preds = %34, %10
%36 = icmp slt i64 %17, 0, !dbg !22
%37 = extractvalue { i32, i32, i32, i32 } %26, 3, !dbg !18
%38 = extractvalue { i32, i32, i32, i32 } %26, 2, !dbg !18
%39 = extractvalue { i32, i32, i32, i32 } %26, 1, !dbg !18
%40 = extractvalue { i32, i32, i32, i32 } %26, 0, !dbg !18
%41 = lshr i32 %11, 5, !dbg !10
%42 = and i32 %41, 1, !dbg !10
%43 = and i32 %11, 31, !dbg !10
%44 = shl i64 %17, 8, !dbg !26
%45 = add i64 %44, 12865792, !dbg !26
%46 = select i1 %36, i64 %45, i64 %44, !dbg !26
%47 = or i64 %46, %27, !dbg !27
%48 = getelementptr float, ptr addrspace(1) %2, i64 %47, !dbg !28
%49 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %48, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !29
%50 = extractvalue { i32, i32, i32, i32 } %49, 0, !dbg !29
%51 = extractvalue { i32, i32, i32, i32 } %49, 1, !dbg !29
%52 = extractvalue { i32, i32, i32, i32 } %49, 2, !dbg !29
%53 = extractvalue { i32, i32, i32, i32 } %49, 3, !dbg !29
%54 = insertelement <2 x i32> poison, i32 %40, i64 0, !dbg !18
%55 = insertelement <2 x i32> %54, i32 %39, i64 1, !dbg !18
%56 = bitcast <2 x i32> %55 to <2 x float>, !dbg !18
%57 = insertelement <2 x i32> poison, i32 %50, i64 0, !dbg !29
%58 = insertelement <2 x i32> %57, i32 %51, i64 1, !dbg !29
%59 = bitcast <2 x i32> %58 to <2 x float>, !dbg !29
%60 = fadd <2 x float> %56, %59, !dbg !30
%61 = insertelement <2 x i32> poison, i32 %37, i64 0, !dbg !18
%62 = insertelement <2 x i32> %61, i32 %38, i64 1, !dbg !18
%63 = bitcast <2 x i32> %62 to <2 x float>, !dbg !18
%64 = insertelement <2 x i32> poison, i32 %53, i64 0, !dbg !29
%65 = insertelement <2 x i32> %64, i32 %52, i64 1, !dbg !29
%66 = bitcast <2 x i32> %65 to <2 x float>, !dbg !29
%67 = fadd <2 x float> %63, %66, !dbg !30
%68 = extractelement <2 x float> %60, i64 0, !dbg !31
%69 = extractelement <2 x float> %60, i64 1, !dbg !31
%70 = fadd float %68, %69, !dbg !31
%71 = extractelement <2 x float> %67, i64 1, !dbg !31
%72 = fadd float %71, %70, !dbg !31
%73 = extractelement <2 x float> %67, i64 0, !dbg !31
%74 = fadd float %73, %72, !dbg !31
%75 = bitcast float %74 to i32, !dbg !37
%76 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %75, i32 16, i32 31), !dbg !37
%77 = bitcast i32 %76 to float, !dbg !37
%78 = fadd float %74, %77, !dbg !31
%79 = bitcast float %78 to i32, !dbg !37
%80 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %79, i32 8, i32 31), !dbg !37
%81 = bitcast i32 %80 to float, !dbg !37
%82 = fadd float %78, %81, !dbg !31
%83 = bitcast float %82 to i32, !dbg !37
%84 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %83, i32 4, i32 31), !dbg !37
%85 = bitcast i32 %84 to float, !dbg !37
%86 = fadd float %82, %85, !dbg !31
%87 = bitcast float %86 to i32, !dbg !37
%88 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %87, i32 2, i32 31), !dbg !37
%89 = bitcast i32 %88 to float, !dbg !37
%90 = fadd float %86, %89, !dbg !31
%91 = bitcast float %90 to i32, !dbg !37
%92 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %91, i32 1, i32 31), !dbg !37
%93 = bitcast i32 %92 to float, !dbg !37
%94 = fadd float %90, %93, !dbg !31
%95 = icmp eq i32 %43, 0, !dbg !37
%96 = zext nneg i32 %42 to i64, !dbg !37
%97 = getelementptr float, ptr addrspace(3) @global_smem, i64 %96, !dbg !37
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %97, float %94, i1 %95) #6, !dbg !37
tail call void @llvm.nvvm.barrier0(), !dbg !37
%98 = icmp slt i32 %11, 2, !dbg !37
%99 = sext i32 %11 to i64, !dbg !37
%100 = getelementptr float, ptr addrspace(3) @global_smem, i64 %99, !dbg !37
%101 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %100, i1 %98) #6, !dbg !37
%102 = bitcast float %101 to i32, !dbg !37
%103 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %102, i32 1, i32 31), !dbg !37
%104 = bitcast i32 %103 to float, !dbg !37
%105 = fadd float %101, %104, !dbg !31
%106 = and i32 %11, 1, !dbg !37
%107 = icmp eq i32 %106, 0, !dbg !37
%108 = and i1 %98, %107, !dbg !37
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %100, float %105, i1 %108) #6, !dbg !37
tail call void @llvm.nvvm.barrier0(), !dbg !37
%109 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !37
%110 = fadd float %109, 0.000000e+00, !dbg !39
%111 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %110, float 2.560000e+02) #6, !dbg !43
%112 = fsub float %68, %111, !dbg !44
%113 = fsub float %69, %111, !dbg !44
%114 = fsub float %71, %111, !dbg !44
%115 = fsub float %73, %111, !dbg !44
%116 = fmul float %112, %112, !dbg !45
%117 = fmul float %113, %113, !dbg !45
%118 = fmul float %114, %114, !dbg !45
%119 = fmul float %115, %115, !dbg !45
tail call void @llvm.nvvm.barrier0(), !dbg !46
%120 = fadd float %116, %117, !dbg !48
%121 = fadd float %118, %120, !dbg !48
%122 = fadd float %119, %121, !dbg !48
%123 = bitcast float %122 to i32, !dbg !46
%124 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %123, i32 16, i32 31), !dbg !46
%125 = bitcast i32 %124 to float, !dbg !46
%126 = fadd float %122, %125, !dbg !48
%127 = bitcast float %126 to i32, !dbg !46
%128 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %127, i32 8, i32 31), !dbg !46
%129 = bitcast i32 %128 to float, !dbg !46
%130 = fadd float %126, %129, !dbg !48
%131 = bitcast float %130 to i32, !dbg !46
%132 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %131, i32 4, i32 31), !dbg !46
%133 = bitcast i32 %132 to float, !dbg !46
%134 = fadd float %130, %133, !dbg !48
%135 = bitcast float %134 to i32, !dbg !46
%136 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %135, i32 2, i32 31), !dbg !46
%137 = bitcast i32 %136 to float, !dbg !46
%138 = fadd float %134, %137, !dbg !48
%139 = bitcast float %138 to i32, !dbg !46
%140 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %139, i32 1, i32 31), !dbg !46
%141 = bitcast i32 %140 to float, !dbg !46
%142 = fadd float %138, %141, !dbg !48
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %97, float %142, i1 %95) #6, !dbg !46
tail call void @llvm.nvvm.barrier0(), !dbg !46
%143 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %100, i1 %98) #6, !dbg !46
%144 = bitcast float %143 to i32, !dbg !46
%145 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %144, i32 1, i32 31), !dbg !46
%146 = bitcast i32 %145 to float, !dbg !46
%147 = fadd float %143, %146, !dbg !48
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %100, float %147, i1 %108) #6, !dbg !46
tail call void @llvm.nvvm.barrier0(), !dbg !46
%148 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !46
%149 = fadd float %148, 0.000000e+00, !dbg !51
%150 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %149, float 2.560000e+02) #6, !dbg !53
%151 = fadd float %150, 0x3EE4F8B580000000, !dbg !54
%152 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !55
%.not.i = icmp eq i32 %152, 0, !dbg !55
br i1 %.not.i, label %155, label %153, !dbg !55
153: ; preds = %35
%154 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %151), !dbg !55
br label %__nv_rsqrtf.exit, !dbg !55
155: ; preds = %35
%156 = tail call float @llvm.nvvm.rsqrt.approx.f(float %151), !dbg !55
br label %__nv_rsqrtf.exit, !dbg !55
__nv_rsqrtf.exit: ; preds = %153, %155
%.0.i = phi float [ %154, %153 ], [ %156, %155 ], !dbg !55
%157 = extractvalue { i32, i32, i32, i32 } %29, 0, !dbg !20
%158 = bitcast i32 %157 to float, !dbg !20
%159 = extractvalue { i32, i32, i32, i32 } %29, 1, !dbg !20
%160 = bitcast i32 %159 to float, !dbg !20
%161 = extractvalue { i32, i32, i32, i32 } %29, 2, !dbg !20
%162 = bitcast i32 %161 to float, !dbg !20
%163 = extractvalue { i32, i32, i32, i32 } %29, 3, !dbg !20
%164 = bitcast i32 %163 to float, !dbg !20
%165 = fmul float %112, %.0.i, !dbg !56
%166 = fmul float %113, %.0.i, !dbg !56
%167 = fmul float %114, %.0.i, !dbg !56
%168 = fmul float %115, %.0.i, !dbg !56
%169 = fmul float %165, %158, !dbg !57
%170 = fmul float %166, %160, !dbg !57
%171 = fmul float %167, %162, !dbg !57
%172 = fmul float %168, %164, !dbg !57
%173 = shl i32 %13, 8, !dbg !58
%174 = or i32 %173, %12, !dbg !59
%175 = sext i32 %174 to i64, !dbg !60
%176 = getelementptr float, ptr addrspace(1) %5, i64 %175, !dbg !60
%177 = bitcast float %68 to i32, !dbg !61
%178 = bitcast float %69 to i32, !dbg !61
%179 = bitcast float %71 to i32, !dbg !61
%180 = bitcast float %73 to i32, !dbg !61
tail call void asm sideeffect "@$5 st.global.v4.b32 [ $4 + 0 ], { $0, $1, $2, $3 };", "r,r,r,r,l,b"(i32 %177, i32 %178, i32 %179, i32 %180, ptr addrspace(1) %176, i1 true) #6, !dbg !61
tail call void @llvm.nvvm.barrier0(), !dbg !62
%181 = getelementptr float, ptr addrspace(1) %0, i64 %15, !dbg !63
%182 = icmp eq i32 %urem, 0, !dbg !64
%183 = bitcast float %.0.i to i32, !dbg !64
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %183, ptr addrspace(1) %181, i1 %182) #6, !dbg !64
%184 = getelementptr i16, ptr addrspace(1) %7, i64 %175, !dbg !65
%185 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %169) #6, !dbg !66
%186 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %170) #6, !dbg !66
%187 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %171) #6, !dbg !66
%188 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %172) #6, !dbg !66
%189 = insertelement <2 x i16> undef, i16 %185, i64 0, !dbg !66
%190 = insertelement <2 x i16> %189, i16 %186, i64 1, !dbg !66
%191 = bitcast <2 x i16> %190 to i32, !dbg !66
%192 = insertelement <2 x i16> undef, i16 %187, i64 0, !dbg !66
%193 = insertelement <2 x i16> %192, i16 %188, i64 1, !dbg !66
%194 = bitcast <2 x i16> %193 to i32, !dbg !66
tail call void asm sideeffect "@$3 st.global.v2.b32 [ $2 + 0 ], { $0, $1 };", "r,r,l,b"(i32 %191, i32 %194, ptr addrspace(1) %184, i1 true) #6, !dbg !66
%195 = getelementptr float, ptr addrspace(1) %6, i64 %15, !dbg !67
%196 = bitcast float %111 to i32, !dbg !68
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %196, ptr addrspace(1) %195, i1 %182) #6, !dbg !68
ret void, !dbg !69
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #1
; Function Attrs: convergent nocallback nounwind
declare void @llvm.nvvm.barrier0() #2
; Function Attrs: alwaysinline nounwind
define float @__nv_rsqrtf(float %x) local_unnamed_addr #3 {
%1 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6
%.not = icmp eq i32 %1, 0
br i1 %.not, label %4, label %2
2: ; preds = %0
%3 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %x)
br label %6
4: ; preds = %0
%5 = tail call float @llvm.nvvm.rsqrt.approx.f(float %x)
br label %6
6: ; preds = %4, %2
%.0 = phi float [ %3, %2 ], [ %5, %4 ]
ret float %.0
}
declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #4
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare float @llvm.nvvm.rsqrt.approx.ftz.f(float) #5
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare float @llvm.nvvm.rsqrt.approx.f(float) #5
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) }
attributes #2 = { convergent nocallback nounwind }
attributes #3 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
attributes #6 = { nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2}
!nvvm.annotations = !{!4, !5, !5, !4}
!llvm.ident = !{!6}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!3 = !DIFile(filename: "cpdqiwgwgnzx7tsvbieui7kffx5dt43uhgvg7z7egekxcsybpv34.py", directory: "/tmp/torchinductor_root/pd")
!4 = !{ptr @triton__0d1d2d3d4d5d6d7d8de9de, !"kernel", i32 1}
!5 = !{ptr @triton__0d1d2d3d4d5d6d7d8de9de, !"maxntidx", i32 64}
!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
!7 = distinct !DISubprogram(name: "triton__0d1d2d3d4d5d6d7d8de9de", linkageName: "triton__0d1d2d3d4d5d6d7d8de9de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
!8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
!9 = !{}
!10 = !DILocation(line: 26, column: 26, scope: !7)
!11 = !DILocation(line: 23, column: 28, scope: !7)
!12 = !DILocation(line: 30, column: 18, scope: !7)
!13 = !DILocation(line: 31, column: 30, scope: !7)
!14 = !DILocation(line: 31, column: 35, scope: !7)
!15 = !DILocation(line: 32, column: 40, scope: !7)
!16 = !DILocation(line: 32, column: 36, scope: !7)
!17 = !DILocation(line: 32, column: 30, scope: !7)
!18 = !DILocation(line: 32, column: 46, scope: !7)
!19 = !DILocation(line: 33, column: 31, scope: !7)
!20 = !DILocation(line: 33, column: 36, scope: !7)
!21 = !DILocation(line: 34, column: 18, scope: !7)
!22 = !DILocation(line: 35, column: 18, scope: !7)
!23 = !DILocation(line: 36, column: 32, scope: !7)
!24 = !DILocation(line: 37, column: 36, scope: !7)
!25 = !DILocation(line: 37, column: 51, scope: !7)
!26 = !DILocation(line: 38, column: 40, scope: !7)
!27 = !DILocation(line: 38, column: 36, scope: !7)
!28 = !DILocation(line: 38, column: 30, scope: !7)
!29 = !DILocation(line: 38, column: 48, scope: !7)
!30 = !DILocation(line: 39, column: 18, scope: !7)
!31 = !DILocation(line: 233, column: 15, scope: !32, inlinedAt: !35)
!32 = distinct !DILexicalBlockFile(scope: !34, file: !33, discriminator: 0)
!33 = !DIFile(filename: "standard.py", directory: "/usr/local/lib/python3.10/dist-packages/triton/language")
!34 = distinct !DILexicalBlockFile(scope: !7, file: !33, discriminator: 0)
!35 = !DILocation(line: 243, column: 36, scope: !32, inlinedAt: !36)
!36 = !DILocation(line: 44, column: 59, scope: !32)
!37 = !DILocation(line: 243, column: 36, scope: !34, inlinedAt: !38)
!38 = !DILocation(line: 44, column: 59, scope: !34)
!39 = !DILocation(line: 8, column: 15, scope: !40, inlinedAt: !42)
!40 = distinct !DILexicalBlockFile(scope: !7, file: !41, discriminator: 0)
!41 = !DIFile(filename: "triton_helpers.py", directory: "/usr/local/lib/python3.10/dist-packages/torch/_inductor")
!42 = !DILocation(line: 44, column: 45, scope: !40)
!43 = !DILocation(line: 47, column: 20, scope: !7)
!44 = !DILocation(line: 48, column: 19, scope: !7)
!45 = !DILocation(line: 49, column: 20, scope: !7)
!46 = !DILocation(line: 243, column: 36, scope: !34, inlinedAt: !47)
!47 = !DILocation(line: 52, column: 59, scope: !34)
!48 = !DILocation(line: 233, column: 15, scope: !32, inlinedAt: !49)
!49 = !DILocation(line: 243, column: 36, scope: !32, inlinedAt: !50)
!50 = !DILocation(line: 52, column: 59, scope: !32)
!51 = !DILocation(line: 8, column: 15, scope: !40, inlinedAt: !52)
!52 = !DILocation(line: 52, column: 45, scope: !40)
!53 = !DILocation(line: 54, column: 20, scope: !7)
!54 = !DILocation(line: 56, column: 20, scope: !7)
!55 = !DILocation(line: 57, column: 26, scope: !7)
!56 = !DILocation(line: 59, column: 20, scope: !7)
!57 = !DILocation(line: 60, column: 20, scope: !7)
!58 = !DILocation(line: 62, column: 35, scope: !7)
!59 = !DILocation(line: 62, column: 31, scope: !7)
!60 = !DILocation(line: 62, column: 25, scope: !7)
!61 = !DILocation(line: 62, column: 47, scope: !7)
!62 = !DILocation(line: 63, column: 4, scope: !7)
!63 = !DILocation(line: 64, column: 28, scope: !7)
!64 = !DILocation(line: 64, column: 40, scope: !7)
!65 = !DILocation(line: 65, column: 25, scope: !7)
!66 = !DILocation(line: 65, column: 48, scope: !7)
!67 = !DILocation(line: 66, column: 25, scope: !7)
!68 = !DILocation(line: 66, column: 37, scope: !7)
!69 = !DILocation(line: 66, column: 4, scope: !7)