; ModuleID = 'LLVMDialectModule' source_filename = "LLVMDialectModule" define void @triton__0d1d2de(ptr addrspace(1) %0, ptr addrspace(1) %1, i32 %2) local_unnamed_addr !dbg !5 { %4 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !8 %5 = shl i32 %4, 1, !dbg !8 %6 = and i32 %5, 510, !dbg !8 %7 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #1, !dbg !9 %8 = shl i32 %7, 9, !dbg !10 %9 = or i32 %8, %6, !dbg !11 %10 = icmp slt i32 %9, 12865792, !dbg !12 %11 = sext i32 %9 to i64, !dbg !13 %12 = getelementptr i16, ptr addrspace(1) %0, i64 %11, !dbg !13 %13 = tail call i32 asm sideeffect "mov.u32 $0, 0x0;\0A\09@$2 ld.global.b32 { $0 }, [ $1 + 0 ];", "=r,l,b"(ptr addrspace(1) %12, i1 %10) #1, !dbg !14 %14 = trunc i32 %13 to i16, !dbg !14 %extelt.offset = lshr i32 %13, 16, !dbg !14 %15 = trunc i32 %extelt.offset to i16, !dbg !14 %16 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %14) #1, !dbg !15 %17 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %15) #1, !dbg !15 %18 = getelementptr float, ptr addrspace(1) %1, i64 %11, !dbg !16 %19 = bitcast float %16 to i32, !dbg !17 %20 = bitcast float %17 to i32, !dbg !17 tail call void asm sideeffect "@$3 st.global.v2.b32 [ $2 + 0 ], { $0, $1 };", "r,r,l,b"(i32 %19, i32 %20, ptr addrspace(1) %18, i1 %10) #1, !dbg !17 ret void, !dbg !18 } ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0 attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { nounwind } !llvm.module.flags = !{!0} !llvm.dbg.cu = !{!1} !nvvm.annotations = !{!3, !4, !4, !3} !0 = !{i32 2, !"Debug Info Version", i32 3} !1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) !2 = !DIFile(filename: "cmxm2obucqff2z4vc55zcnscfuvur5s2b3e36dvgm57qobanlpho.py", directory: "/tmp/torchinductor_root/mx") !3 = !{ptr @triton__0d1d2de, !"kernel", i32 1} !4 = !{ptr @triton__0d1d2de, !"maxntidx", i32 256} !5 = distinct !DISubprogram(name: "triton__0d1d2de", linkageName: "triton__0d1d2de", scope: !2, file: !2, line: 18, type: !6, scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1) !6 = !DISubroutineType(cc: DW_CC_normal, types: !7) !7 = !{} !8 = !DILocation(line: 21, column: 36, scope: !5) !9 = !DILocation(line: 20, column: 28, scope: !5) !10 = !DILocation(line: 20, column: 33, scope: !5) !11 = !DILocation(line: 21, column: 23, scope: !5) !12 = !DILocation(line: 22, column: 21, scope: !5) !13 = !DILocation(line: 24, column: 30, scope: !5) !14 = !DILocation(line: 24, column: 35, scope: !5) !15 = !DILocation(line: 24, column: 45, scope: !5) !16 = !DILocation(line: 26, column: 25, scope: !5) !17 = !DILocation(line: 26, column: 36, scope: !5) !18 = !DILocation(line: 26, column: 4, scope: !5)