|
[general] |
|
name = "moe" |
|
|
|
[torch] |
|
src = [ |
|
"core/scalar_type.hpp", |
|
"torch-ext/torch_binding.cpp", |
|
"torch-ext/torch_binding.h", |
|
] |
|
include = ["."] |
|
pyext = ["py", "json"] |
|
|
|
[kernel.fp8] |
|
cuda-capabilities = ["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0"] |
|
src = [ |
|
"cuda_compat.h", |
|
"dispatch_utils.h", |
|
"fp8/amd/hip_float8.h", |
|
"fp8/amd/hip_float8_impl.h", |
|
"fp8/common.cu", |
|
"fp8/common.cuh", |
|
"fp8/vectorization.cuh", |
|
] |
|
include = ["."] |
|
depends = ["torch"] |
|
|
|
|
|
[kernel.moe] |
|
cuda-capabilities = ["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0"] |
|
src = [ |
|
"cuda_compat.h", |
|
"dispatch_utils.h", |
|
"moe/moe_align_sum_kernels.cu", |
|
"moe/moe_wna16.cu", |
|
"moe/moe_wna16_utils.h", |
|
"moe/topk_softmax_kernels.cu", |
|
] |
|
depends = ["torch"] |
|
|
|
[kernel.moe-marlin] |
|
cuda-capabilities = ["8.0", "8.6", "8.7", "8.9", "9.0"] |
|
src = [ |
|
"core/exception.hpp", |
|
"core/scalar_type.hpp", |
|
"marlin-moe/marlin_moe_ops.cu", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.cu", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.cu", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel.h", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.h", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.h", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.cu", |
|
"marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.h", |
|
] |
|
include = ["."] |
|
depends = ["torch"] |
|
|
|
[kernel.activation] |
|
cuda-capabilities = ["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0"] |
|
src = [ |
|
"activation/activation_kernels.cu", |
|
"activation/cuda_compat.h", |
|
"activation/dispatch_utils.h", |
|
] |
|
depends = ["torch"] |
|
|