Build (aarch64)
Browse files- build/torch26-cxx11-cu126-aarch64-linux/flash_mla/__init__.py +33 -0
- build/torch26-cxx11-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so +3 -0
- build/torch26-cxx11-cu126-aarch64-linux/flash_mla/_ops.py +9 -0
- build/torch26-cxx98-cu126-aarch64-linux/flash_mla/__init__.py +33 -0
- build/torch26-cxx98-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so +3 -0
- build/torch26-cxx98-cu126-aarch64-linux/flash_mla/_ops.py +9 -0
- build/torch27-cxx11-cu126-aarch64-linux/flash_mla/__init__.py +33 -0
- build/torch27-cxx11-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so +3 -0
- build/torch27-cxx11-cu126-aarch64-linux/flash_mla/_ops.py +9 -0
- build/torch27-cxx11-cu128-aarch64-linux/flash_mla/__init__.py +33 -0
- build/torch27-cxx11-cu128-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so +3 -0
- build/torch27-cxx11-cu128-aarch64-linux/flash_mla/_ops.py +9 -0
build/torch26-cxx11-cu126-aarch64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.mha_fwd_kvcache_mla(
|
23 |
+
q,
|
24 |
+
kcache,
|
25 |
+
vcache_,
|
26 |
+
head_size_v,
|
27 |
+
seqlens_k,
|
28 |
+
block_table,
|
29 |
+
softmax_scale,
|
30 |
+
is_causal_,
|
31 |
+
tile_scheduler_metadata,
|
32 |
+
num_splits
|
33 |
+
)
|
build/torch26-cxx11-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1e97fef62f5ebbe6b19b0d5fbe700fcdf6b9acd7a54cba6f0b1d23665188fa9
|
3 |
+
size 2643848
|
build/torch26-cxx11-cu126-aarch64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_341ab77
|
3 |
+
ops = torch.ops._flash_mla_341ab77
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_341ab77::{op_name}"
|
build/torch26-cxx98-cu126-aarch64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.mha_fwd_kvcache_mla(
|
23 |
+
q,
|
24 |
+
kcache,
|
25 |
+
vcache_,
|
26 |
+
head_size_v,
|
27 |
+
seqlens_k,
|
28 |
+
block_table,
|
29 |
+
softmax_scale,
|
30 |
+
is_causal_,
|
31 |
+
tile_scheduler_metadata,
|
32 |
+
num_splits
|
33 |
+
)
|
build/torch26-cxx98-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f15b3b0bd0bee56760bd6500175ca5a1fd17f2742ef9496c28ea3720d038c66
|
3 |
+
size 2640208
|
build/torch26-cxx98-cu126-aarch64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_341ab77
|
3 |
+
ops = torch.ops._flash_mla_341ab77
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_341ab77::{op_name}"
|
build/torch27-cxx11-cu126-aarch64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.mha_fwd_kvcache_mla(
|
23 |
+
q,
|
24 |
+
kcache,
|
25 |
+
vcache_,
|
26 |
+
head_size_v,
|
27 |
+
seqlens_k,
|
28 |
+
block_table,
|
29 |
+
softmax_scale,
|
30 |
+
is_causal_,
|
31 |
+
tile_scheduler_metadata,
|
32 |
+
num_splits
|
33 |
+
)
|
build/torch27-cxx11-cu126-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb925b062d31034672a45d925a3767d953e97a3c6c483467e6b81833d42b5a27
|
3 |
+
size 2644048
|
build/torch27-cxx11-cu126-aarch64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_341ab77
|
3 |
+
ops = torch.ops._flash_mla_341ab77
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_341ab77::{op_name}"
|
build/torch27-cxx11-cu128-aarch64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.mha_fwd_kvcache_mla(
|
23 |
+
q,
|
24 |
+
kcache,
|
25 |
+
vcache_,
|
26 |
+
head_size_v,
|
27 |
+
seqlens_k,
|
28 |
+
block_table,
|
29 |
+
softmax_scale,
|
30 |
+
is_causal_,
|
31 |
+
tile_scheduler_metadata,
|
32 |
+
num_splits
|
33 |
+
)
|
build/torch27-cxx11-cu128-aarch64-linux/flash_mla/_flash_mla_341ab77.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7776c629263bc0b32b82b8a094ead0749d6c393b6ca25c9ffa812bd8fbdb3002
|
3 |
+
size 2709472
|
build/torch27-cxx11-cu128-aarch64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_341ab77
|
3 |
+
ops = torch.ops._flash_mla_341ab77
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_341ab77::{op_name}"
|