0-hero's picture
Add files using upload-large-folder tool
9ab9a5e verified
raw
history blame
2.99 kB
; ModuleID = 'LLVMDialectModule'
source_filename = "LLVMDialectModule"
define void @triton__0d1d2de(ptr addrspace(1) %0, ptr addrspace(1) %1, i32 %2) local_unnamed_addr !dbg !5 {
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !8
%5 = shl i32 %4, 1, !dbg !8
%6 = and i32 %5, 510, !dbg !8
%7 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #1, !dbg !9
%8 = shl i32 %7, 9, !dbg !10
%9 = or i32 %8, %6, !dbg !11
%10 = sext i32 %9 to i64, !dbg !12
%11 = getelementptr float, ptr addrspace(1) %0, i64 %10, !dbg !12
%12 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.v2.b32 { $0, $1 }, [ $2 + 0 ];", "=r,=r,l,b"(ptr addrspace(1) %11, i1 true) #1, !dbg !13
%13 = extractvalue { i32, i32 } %12, 0, !dbg !13
%14 = extractvalue { i32, i32 } %12, 1, !dbg !13
%15 = bitcast i32 %13 to float, !dbg !13
%16 = bitcast i32 %14 to float, !dbg !13
%17 = getelementptr i16, ptr addrspace(1) %1, i64 %10, !dbg !14
%18 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %15) #1, !dbg !15
%19 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %16) #1, !dbg !15
%20 = insertelement <2 x i16> undef, i16 %18, i64 0, !dbg !15
%21 = insertelement <2 x i16> %20, i16 %19, i64 1, !dbg !15
%22 = bitcast <2 x i16> %21 to i32, !dbg !15
tail call void asm sideeffect "@$2 st.global.b32 [ $1 + 0 ], { $0 };", "r,l,b"(i32 %22, ptr addrspace(1) %17, i1 true) #1, !dbg !15
ret void, !dbg !16
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!1}
!nvvm.annotations = !{!3, !4, !4, !3}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!2 = !DIFile(filename: "czjxjqxojsyyr4zmce6q6twysnucw6p4l5ujgp6ts2ecrm3ue3ex.py", directory: "/tmp/torchinductor_root/zj")
!3 = !{ptr @triton__0d1d2de, !"kernel", i32 1}
!4 = !{ptr @triton__0d1d2de, !"maxntidx", i32 256}
!5 = distinct !DISubprogram(name: "triton__0d1d2de", linkageName: "triton__0d1d2de", scope: !2, file: !2, line: 18, type: !6, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1)
!6 = !DISubroutineType(cc: DW_CC_normal, types: !7)
!7 = !{}
!8 = !DILocation(line: 21, column: 36, scope: !5)
!9 = !DILocation(line: 20, column: 28, scope: !5)
!10 = !DILocation(line: 20, column: 33, scope: !5)
!11 = !DILocation(line: 21, column: 23, scope: !5)
!12 = !DILocation(line: 24, column: 30, scope: !5)
!13 = !DILocation(line: 24, column: 35, scope: !5)
!14 = !DILocation(line: 26, column: 25, scope: !5)
!15 = !DILocation(line: 26, column: 36, scope: !5)
!16 = !DILocation(line: 26, column: 4, scope: !5)