0-hero's picture
Add files using upload-large-folder tool
9ab9a5e verified
raw
history blame
18.9 kB
; ModuleID = 'LLVMDialectModule'
source_filename = "LLVMDialectModule"
@global_smem = external addrspace(3) global [0 x i8]
define void @triton__0d1d2d3d4d5d6d7de8de(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, ptr addrspace(1) %4, ptr addrspace(1) %5, ptr addrspace(1) %6, i32 %7, i32 %8) local_unnamed_addr !dbg !5 {
%10 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !8
%11 = and i32 %10, 31, !dbg !8
%12 = lshr i32 %10, 5, !dbg !8
%13 = and i32 %12, 1, !dbg !8
%urem = shl i32 %10, 2, !dbg !8
%14 = and i32 %urem, 252, !dbg !8
%15 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #3, !dbg !9
%16 = shl i32 %15, 8, !dbg !10
%17 = or i32 %16, %14, !dbg !11
%18 = sext i32 %17 to i64, !dbg !12
%19 = getelementptr i16, ptr addrspace(1) %1, i64 %18, !dbg !12
%20 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %19, i1 true, i32 0, i1 true, i32 0, i1 true) #3, !dbg !13
%21 = extractvalue { i32, i32 } %20, 0, !dbg !13
%22 = extractvalue { i32, i32 } %20, 1, !dbg !13
%23 = trunc i32 %21 to i16, !dbg !13
%extelt.offset = lshr i32 %21, 16, !dbg !13
%24 = trunc i32 %extelt.offset to i16, !dbg !13
%25 = trunc i32 %22 to i16, !dbg !13
%extelt.offset1 = lshr i32 %22, 16, !dbg !13
%26 = trunc i32 %extelt.offset1 to i16, !dbg !13
%27 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %23) #3, !dbg !14
%28 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %24) #3, !dbg !14
%29 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %25) #3, !dbg !14
%30 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %26) #3, !dbg !14
%31 = zext nneg i32 %14 to i64, !dbg !15
%32 = getelementptr float, ptr addrspace(1) %2, i64 %31, !dbg !15
%33 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.L1::evict_last.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %32, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #3, !dbg !16
%34 = extractvalue { i32, i32, i32, i32 } %33, 0, !dbg !16
%35 = extractvalue { i32, i32, i32, i32 } %33, 1, !dbg !16
%36 = extractvalue { i32, i32, i32, i32 } %33, 2, !dbg !16
%37 = extractvalue { i32, i32, i32, i32 } %33, 3, !dbg !16
%38 = bitcast i32 %34 to float, !dbg !16
%39 = bitcast i32 %35 to float, !dbg !16
%40 = bitcast i32 %36 to float, !dbg !16
%41 = bitcast i32 %37 to float, !dbg !16
%42 = getelementptr float, ptr addrspace(1) %3, i64 %18, !dbg !17
%43 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %42, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #3, !dbg !18
%44 = extractvalue { i32, i32, i32, i32 } %43, 0, !dbg !18
%45 = extractvalue { i32, i32, i32, i32 } %43, 1, !dbg !18
%46 = extractvalue { i32, i32, i32, i32 } %43, 2, !dbg !18
%47 = extractvalue { i32, i32, i32, i32 } %43, 3, !dbg !18
%48 = bitcast i32 %44 to float, !dbg !18
%49 = bitcast i32 %45 to float, !dbg !18
%50 = bitcast i32 %46 to float, !dbg !18
%51 = bitcast i32 %47 to float, !dbg !18
%52 = sext i32 %15 to i64, !dbg !19
%53 = getelementptr float, ptr addrspace(1) %4, i64 %52, !dbg !19
%54 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %53, i1 true) #3, !dbg !20
%55 = bitcast i32 %54 to float, !dbg !20
%56 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %53, i1 true) #3, !dbg !20
%57 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %53, i1 true) #3, !dbg !20
%58 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %53, i1 true) #3, !dbg !20
%59 = getelementptr float, ptr addrspace(1) %5, i64 %52, !dbg !21
%60 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %59, i1 true) #3, !dbg !22
%61 = bitcast i32 %60 to float, !dbg !22
%62 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %59, i1 true) #3, !dbg !22
%63 = bitcast i32 %62 to float, !dbg !22
%64 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %59, i1 true) #3, !dbg !22
%65 = bitcast i32 %64 to float, !dbg !22
%66 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.L1::evict_last.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %59, i1 true) #3, !dbg !22
%67 = bitcast i32 %66 to float, !dbg !22
%68 = getelementptr float, ptr addrspace(1) %0, i64 %18, !dbg !23
%69 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %68, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #3, !dbg !24
%70 = extractvalue { i32, i32, i32, i32 } %69, 0, !dbg !24
%71 = extractvalue { i32, i32, i32, i32 } %69, 1, !dbg !24
%72 = extractvalue { i32, i32, i32, i32 } %69, 2, !dbg !24
%73 = extractvalue { i32, i32, i32, i32 } %69, 3, !dbg !24
%74 = bitcast i32 %70 to float, !dbg !24
%75 = bitcast i32 %71 to float, !dbg !24
%76 = bitcast i32 %72 to float, !dbg !24
%77 = bitcast i32 %73 to float, !dbg !24
%78 = fmul float %27, %38, !dbg !25
%79 = fmul float %28, %39, !dbg !25
%80 = fmul float %29, %40, !dbg !25
%81 = fmul float %30, %41, !dbg !25
%82 = fadd float %78, %79, !dbg !26
%83 = fadd float %80, %82, !dbg !26
%84 = fadd float %81, %83, !dbg !26
%85 = bitcast float %84 to i32, !dbg !32
%86 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %85, i32 16, i32 31), !dbg !32
%87 = bitcast i32 %86 to float, !dbg !32
%88 = fadd float %84, %87, !dbg !26
%89 = bitcast float %88 to i32, !dbg !32
%90 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %89, i32 8, i32 31), !dbg !32
%91 = bitcast i32 %90 to float, !dbg !32
%92 = fadd float %88, %91, !dbg !26
%93 = bitcast float %92 to i32, !dbg !32
%94 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %93, i32 4, i32 31), !dbg !32
%95 = bitcast i32 %94 to float, !dbg !32
%96 = fadd float %92, %95, !dbg !26
%97 = bitcast float %96 to i32, !dbg !32
%98 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %97, i32 2, i32 31), !dbg !32
%99 = bitcast i32 %98 to float, !dbg !32
%100 = fadd float %96, %99, !dbg !26
%101 = bitcast float %100 to i32, !dbg !32
%102 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %101, i32 1, i32 31), !dbg !32
%103 = bitcast i32 %102 to float, !dbg !32
%104 = fadd float %100, %103, !dbg !26
%105 = icmp eq i32 %11, 0, !dbg !32
%106 = zext nneg i32 %13 to i64, !dbg !32
%107 = getelementptr float, ptr addrspace(3) @global_smem, i64 %106, !dbg !32
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %107, float %104, i1 %105) #3, !dbg !32
tail call void @llvm.nvvm.barrier0(), !dbg !32
%108 = icmp slt i32 %10, 2, !dbg !32
%109 = sext i32 %10 to i64, !dbg !32
%110 = getelementptr float, ptr addrspace(3) @global_smem, i64 %109, !dbg !32
%111 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %110, i1 %108) #3, !dbg !32
%112 = bitcast float %111 to i32, !dbg !32
%113 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %112, i32 1, i32 31), !dbg !32
%114 = bitcast i32 %113 to float, !dbg !32
%115 = fadd float %111, %114, !dbg !26
%116 = and i32 %10, 1, !dbg !32
%117 = icmp eq i32 %116, 0, !dbg !32
%118 = and i1 %108, %117, !dbg !32
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %110, float %115, i1 %118) #3, !dbg !32
tail call void @llvm.nvvm.barrier0(), !dbg !32
%119 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !32
%120 = fadd float %119, 0.000000e+00, !dbg !34
%121 = fsub float %48, %55, !dbg !38
%122 = fsub float %49, %55, !dbg !38
%123 = fsub float %50, %55, !dbg !38
%124 = fsub float %51, %55, !dbg !38
%125 = fmul float %121, %61, !dbg !39
%126 = fmul float %122, %61, !dbg !39
%127 = fmul float %123, %61, !dbg !39
%128 = fmul float %124, %61, !dbg !39
%129 = fmul float %78, %125, !dbg !40
%130 = fmul float %79, %126, !dbg !40
%131 = fmul float %80, %127, !dbg !40
%132 = fmul float %81, %128, !dbg !40
tail call void @llvm.nvvm.barrier0(), !dbg !41
%133 = fadd float %129, %130, !dbg !43
%134 = fadd float %131, %133, !dbg !43
%135 = fadd float %132, %134, !dbg !43
%136 = bitcast float %135 to i32, !dbg !41
%137 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %136, i32 16, i32 31), !dbg !41
%138 = bitcast i32 %137 to float, !dbg !41
%139 = fadd float %135, %138, !dbg !43
%140 = bitcast float %139 to i32, !dbg !41
%141 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %140, i32 8, i32 31), !dbg !41
%142 = bitcast i32 %141 to float, !dbg !41
%143 = fadd float %139, %142, !dbg !43
%144 = bitcast float %143 to i32, !dbg !41
%145 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %144, i32 4, i32 31), !dbg !41
%146 = bitcast i32 %145 to float, !dbg !41
%147 = fadd float %143, %146, !dbg !43
%148 = bitcast float %147 to i32, !dbg !41
%149 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %148, i32 2, i32 31), !dbg !41
%150 = bitcast i32 %149 to float, !dbg !41
%151 = fadd float %147, %150, !dbg !43
%152 = bitcast float %151 to i32, !dbg !41
%153 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %152, i32 1, i32 31), !dbg !41
%154 = bitcast i32 %153 to float, !dbg !41
%155 = fadd float %151, %154, !dbg !43
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %107, float %155, i1 %105) #3, !dbg !41
tail call void @llvm.nvvm.barrier0(), !dbg !41
%156 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %110, i1 %108) #3, !dbg !41
%157 = bitcast float %156 to i32, !dbg !41
%158 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %157, i32 1, i32 31), !dbg !41
%159 = bitcast i32 %158 to float, !dbg !41
%160 = fadd float %156, %159, !dbg !43
tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %110, float %160, i1 %118) #3, !dbg !41
tail call void @llvm.nvvm.barrier0(), !dbg !41
%161 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !41
%162 = fadd float %161, 0.000000e+00, !dbg !46
%163 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %61, float 2.560000e+02) #3, !dbg !48
%164 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %63, float 2.560000e+02) #3, !dbg !48
%165 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %65, float 2.560000e+02) #3, !dbg !48
%166 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %67, float 2.560000e+02) #3, !dbg !48
%167 = fmul float %78, 2.560000e+02, !dbg !49
%168 = fmul float %79, 2.560000e+02, !dbg !49
%169 = fmul float %80, 2.560000e+02, !dbg !49
%170 = fmul float %81, 2.560000e+02, !dbg !49
%171 = fsub float %167, %120, !dbg !50
%172 = fsub float %168, %120, !dbg !50
%173 = fsub float %169, %120, !dbg !50
%174 = fsub float %170, %120, !dbg !50
%175 = fmul float %125, %162, !dbg !51
%176 = fmul float %126, %162, !dbg !51
%177 = fmul float %127, %162, !dbg !51
%178 = fmul float %128, %162, !dbg !51
%179 = fsub float %171, %175, !dbg !52
%180 = fsub float %172, %176, !dbg !52
%181 = fsub float %173, %177, !dbg !52
%182 = fsub float %174, %178, !dbg !52
%183 = fmul float %163, %179, !dbg !53
%184 = fmul float %163, %180, !dbg !53
%185 = fmul float %163, %181, !dbg !53
%186 = fmul float %163, %182, !dbg !53
%187 = fadd float %183, %74, !dbg !54
%188 = fadd float %184, %75, !dbg !54
%189 = fadd float %185, %76, !dbg !54
%190 = fadd float %186, %77, !dbg !54
%191 = bitcast float %187 to i32, !dbg !55
%192 = bitcast float %188 to i32, !dbg !55
%193 = bitcast float %189 to i32, !dbg !55
%194 = bitcast float %190 to i32, !dbg !55
tail call void asm sideeffect "@$5 st.global.v4.b32 [ $4 + 0 ], { $0, $1, $2, $3 };", "r,r,r,r,l,b"(i32 %191, i32 %192, i32 %193, i32 %194, ptr addrspace(1) %68, i1 true) #3, !dbg !55
%195 = getelementptr i16, ptr addrspace(1) %6, i64 %18, !dbg !56
%196 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %187) #3, !dbg !57
%197 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %188) #3, !dbg !57
%198 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %189) #3, !dbg !57
%199 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %190) #3, !dbg !57
%200 = insertelement <2 x i16> undef, i16 %196, i64 0, !dbg !57
%201 = insertelement <2 x i16> %200, i16 %197, i64 1, !dbg !57
%202 = bitcast <2 x i16> %201 to i32, !dbg !57
%203 = insertelement <2 x i16> undef, i16 %198, i64 0, !dbg !57
%204 = insertelement <2 x i16> %203, i16 %199, i64 1, !dbg !57
%205 = bitcast <2 x i16> %204 to i32, !dbg !57
tail call void asm sideeffect "@$3 st.global.v2.b32 [ $2 + 0 ], { $0, $1 };", "r,r,l,b"(i32 %202, i32 %205, ptr addrspace(1) %195, i1 true) #3, !dbg !57
ret void, !dbg !58
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #1
; Function Attrs: convergent nocallback nounwind
declare void @llvm.nvvm.barrier0() #2
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) }
attributes #2 = { convergent nocallback nounwind }
attributes #3 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!1}
!nvvm.annotations = !{!3, !4, !4, !3}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!2 = !DIFile(filename: "csned4hyxpgwu5ttubs3r7uxkjq5yfl3zh6c2sozobtkek2uzfcv.py", directory: "/tmp/torchinductor_root/sn")
!3 = !{ptr @triton__0d1d2d3d4d5d6d7de8de, !"kernel", i32 1}
!4 = !{ptr @triton__0d1d2d3d4d5d6d7de8de, !"maxntidx", i32 64}
!5 = distinct !DISubprogram(name: "triton__0d1d2d3d4d5d6d7de8de", linkageName: "triton__0d1d2d3d4d5d6d7de8de", scope: !2, file: !2, line: 18, type: !6, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1)
!6 = !DISubroutineType(cc: DW_CC_normal, types: !7)
!7 = !{}
!8 = !DILocation(line: 26, column: 26, scope: !5)
!9 = !DILocation(line: 23, column: 28, scope: !5)
!10 = !DILocation(line: 30, column: 40, scope: !5)
!11 = !DILocation(line: 30, column: 36, scope: !5)
!12 = !DILocation(line: 30, column: 30, scope: !5)
!13 = !DILocation(line: 30, column: 46, scope: !5)
!14 = !DILocation(line: 30, column: 67, scope: !5)
!15 = !DILocation(line: 31, column: 30, scope: !5)
!16 = !DILocation(line: 31, column: 35, scope: !5)
!17 = !DILocation(line: 32, column: 30, scope: !5)
!18 = !DILocation(line: 32, column: 46, scope: !5)
!19 = !DILocation(line: 33, column: 30, scope: !5)
!20 = !DILocation(line: 33, column: 35, scope: !5)
!21 = !DILocation(line: 34, column: 31, scope: !5)
!22 = !DILocation(line: 34, column: 36, scope: !5)
!23 = !DILocation(line: 35, column: 35, scope: !5)
!24 = !DILocation(line: 35, column: 51, scope: !5)
!25 = !DILocation(line: 37, column: 18, scope: !5)
!26 = !DILocation(line: 233, column: 15, scope: !27, inlinedAt: !30)
!27 = distinct !DILexicalBlockFile(scope: !29, file: !28, discriminator: 0)
!28 = !DIFile(filename: "standard.py", directory: "/usr/local/lib/python3.10/dist-packages/triton/language")
!29 = distinct !DILexicalBlockFile(scope: !5, file: !28, discriminator: 0)
!30 = !DILocation(line: 243, column: 36, scope: !27, inlinedAt: !31)
!31 = !DILocation(line: 40, column: 57, scope: !27)
!32 = !DILocation(line: 243, column: 36, scope: !29, inlinedAt: !33)
!33 = !DILocation(line: 40, column: 57, scope: !29)
!34 = !DILocation(line: 8, column: 15, scope: !35, inlinedAt: !37)
!35 = distinct !DILexicalBlockFile(scope: !5, file: !36, discriminator: 0)
!36 = !DIFile(filename: "triton_helpers.py", directory: "/usr/local/lib/python3.10/dist-packages/torch/_inductor")
!37 = !DILocation(line: 40, column: 44, scope: !35)
!38 = !DILocation(line: 41, column: 19, scope: !5)
!39 = !DILocation(line: 42, column: 20, scope: !5)
!40 = !DILocation(line: 43, column: 19, scope: !5)
!41 = !DILocation(line: 243, column: 36, scope: !29, inlinedAt: !42)
!42 = !DILocation(line: 46, column: 59, scope: !29)
!43 = !DILocation(line: 233, column: 15, scope: !27, inlinedAt: !44)
!44 = !DILocation(line: 243, column: 36, scope: !27, inlinedAt: !45)
!45 = !DILocation(line: 46, column: 59, scope: !27)
!46 = !DILocation(line: 8, column: 15, scope: !35, inlinedAt: !47)
!47 = !DILocation(line: 46, column: 45, scope: !35)
!48 = !DILocation(line: 48, column: 20, scope: !5)
!49 = !DILocation(line: 49, column: 19, scope: !5)
!50 = !DILocation(line: 50, column: 20, scope: !5)
!51 = !DILocation(line: 51, column: 20, scope: !5)
!52 = !DILocation(line: 52, column: 20, scope: !5)
!53 = !DILocation(line: 53, column: 20, scope: !5)
!54 = !DILocation(line: 54, column: 20, scope: !5)
!55 = !DILocation(line: 56, column: 51, scope: !5)
!56 = !DILocation(line: 57, column: 25, scope: !5)
!57 = !DILocation(line: 57, column: 48, scope: !5)
!58 = !DILocation(line: 57, column: 4, scope: !5)