// | |
// Generated by LLVM NVPTX Back-End | |
// | |
8.2 | |
sm_89 | |
64 | |
// .globl triton__0d1d2de | |
.entry triton__0d1d2de( | |
.u64 triton__0d1d2de_param_0, | |
.u64 triton__0d1d2de_param_1, | |
.u32 triton__0d1d2de_param_2 | |
) | |
256, 1, 1 | |
{ | |
3>; | .pred %p<|
3>; | .b16 %rs<|
12>; | .b32 %r<|
7>; | .b64 %rd<|
1 18 0 | |
$L__func_begin0: | |
1 18 0 | |
ld.param.u64 %rd3, [triton__0d1d2de_param_0]; | |
ld.param.u64 %rd4, [triton__0d1d2de_param_1]; | |
$L__tmp0: | |
1 21 36 | |
mov.u32 %r7, %tid.x; | |
shl.b32 %r8, %r7, 1; | |
and.b32 %r9, %r8, 510; | |
1 20 28 | |
mov.u32 %r1, %ctaid.x; | |
1 20 33 | |
shl.b32 %r10, %r1, 9; | |
1 21 23 | |
or.b32 %r11, %r10, %r9; | |
1 22 21 | |
setp.lt.s32 %p1, %r11, 12865792; | |
1 24 30 | |
mul.wide.s32 %rd5, %r11, 2; | |
add.s64 %rd1, %rd3, %rd5; | |
1 24 35 | |
mov.u32 %r2, 0x0; | |
@%p1 ld..b32 { %r2 }, [ %rd1 + 0 ]; | |
cvt.u16.u32 %rs1, %r2; | |
{ .reg .b16 tmp; mov.b32 {tmp, %rs2}, %r2; } | |
1 24 45 | |
cvt.f32.bf16 %r5, %rs1; | |
cvt.f32.bf16 %r6, %rs2; | |
1 26 25 | |
mul.wide.s32 %rd6, %r11, 4; | |
add.s64 %rd2, %rd4, %rd6; | |
1 26 36 | |
@%p1 st..v2.b32 [ %rd2 + 0 ], { %r5, %r6 }; | |
1 26 4 | |
ret; | |
$L__tmp1: | |
$L__func_end0: | |
} | |
1 "/tmp/torchinductor_root/mx/cmxm2obucqff2z4vc55zcnscfuvur5s2b3e36dvgm57qobanlpho.py" | |
.debug_abbrev | |
{ | |
1 | |
17 | |
1 | |
37 | |
8 | |
19 | |
5 | |
3 | |
8 | |
16 | |
6 | |
27 | |
8 | |
180 | |
66 | |
12 | |
17 | |
1 | |
18 | |
1 | |
0 | |
0 | |
2 | |
46 | |
0 | |
17 | |
1 | |
18 | |
1 | |
64 | |
10 | |
135 | |
64 | |
8 | |
3 | |
8 | |
58 | |
11 | |
59 | |
11 | |
63 | |
12 | |
0 | |
0 | |
0 | |
} | |
.debug_info | |
{ | |
176 | |
2 | |
0 | |
.debug_abbrev | |
8 | |
1 | |
116 | |
114 | |
105 | |
116 | |
111 | |
110 | |
0 | |
2 | |
0 | |
99 | |
109 | |
120 | |
109 | |
50 | |
111 | |
98 | |
117 | |
99 | |
113 | |
102 | |
102 | |
50 | |
122 | |
52 | |
118 | |
99 | |
53 | |
53 | |
122 | |
99 | |
110 | |
115 | |
99 | |
102 | |
117 | |
118 | |
117 | |
114 | |
53 | |
115 | |
50 | |
98 | |
51 | |
101 | |
51 | |
54 | |
100 | |
118 | |
103 | |
109 | |
53 | |
55 | |
113 | |
111 | |
98 | |
97 | |
110 | |
108 | |
112 | |
104 | |
111 | |
46 | |
112 | |
121 | |
0 | |
.debug_line | |
47 | |
116 | |
109 | |
112 | |
47 | |
116 | |
111 | |
114 | |
99 | |
104 | |
105 | |
110 | |
100 | |
117 | |
99 | |
116 | |
111 | |
114 | |
95 | |
114 | |
111 | |
111 | |
116 | |
47 | |
109 | |
120 | |
0 | |
1 | |
$L__func_begin0 | |
$L__func_end0 | |
2 | |
$L__func_begin0 | |
$L__func_end0 | |
1 | |
156 | |
116 | |
114 | |
105 | |
116 | |
111 | |
110 | |
95 | |
95 | |
48 | |
100 | |
49 | |
100 | |
50 | |
100 | |
101 | |
0 | |
116 | |
114 | |
105 | |
116 | |
111 | |
110 | |
95 | |
95 | |
48 | |
100 | |
49 | |
100 | |
50 | |
100 | |
101 | |
0 | |
1 | |
18 | |
1 | |
0 | |
} | |
.debug_pubnames | |
{ | |
$L__pubNames_end0-$L__pubNames_start0 | |
$L__pubNames_start0: | |
2 | |
0 | |
.debug_info | |
180 | |
125 | |
116 | |
114 | |
105 | |
116 | |
111 | |
110 | |
95 | |
95 | |
48 | |
100 | |
49 | |
100 | |
50 | |
100 | |
101 | |
0 | |
0 | |
$L__pubNames_end0: | |
} | |
.debug_pubtypes | |
{ | |
$L__pubTypes_end0-$L__pubTypes_start0 | |
$L__pubTypes_start0: | |
2 | |
0 | |
.debug_info | |
180 | |
0 | |
$L__pubTypes_end0: | |
} | |
.debug_loc { } | |