kernel
moe / build.toml
danieldk's picture
danieldk HF Staff
Sync with vLLM and add `Llama4TextMoe` layer
01fbc17
[general]
name = "moe"
[torch]
src = [
"core/scalar_type.hpp",
"torch-ext/torch_binding.cpp",
"torch-ext/torch_binding.h",
]
include = ["."]
pyext = ["py", "json"]
[kernel.fp8]
cuda-capabilities = ["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0"]
src = [
"cuda_compat.h",
"dispatch_utils.h",
"fp8/amd/hip_float8.h",
"fp8/amd/hip_float8_impl.h",
"fp8/common.cu",
"fp8/common.cuh",
"fp8/vectorization.cuh",
]
include = ["."]
depends = ["torch"]
[kernel.moe]
cuda-capabilities = ["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0"]
src = [
"cuda_compat.h",
"dispatch_utils.h",
"moe/moe_align_sum_kernels.cu",
"moe/moe_wna16.cu",
"moe/moe_wna16_utils.h",
"moe/topk_softmax_kernels.cu",
]
depends = ["torch"]
[kernel.moe-marlin]
cuda-capabilities = ["8.0", "8.6", "8.7", "8.9", "9.0"]
src = [
"core/exception.hpp",
"core/scalar_type.hpp",
"marlin-moe/marlin_moe_ops.cu",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.cu",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.cu",
"marlin-moe/marlin_kernels/marlin_moe_kernel.h",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.h",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.h",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.cu",
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.h",
]
include = ["."]
depends = ["torch"]
[kernel.activation]
cuda-capabilities = ["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0"]
src = [
"activation/activation_kernels.cu",
"activation/cuda_compat.h",
"activation/dispatch_utils.h",
]
depends = ["torch"]