Sync capabilities with upstream
Browse files- build.toml +6 -6
build.toml
CHANGED
@@ -11,7 +11,7 @@ src = [
|
|
11 |
include = [ "." ]
|
12 |
|
13 |
[kernel.cutlass_w8a8]
|
14 |
-
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "
|
15 |
src = [
|
16 |
"core/math.hpp",
|
17 |
"cutlass_w8a8/common.hpp",
|
@@ -47,7 +47,7 @@ depends = [ "cutlass_3_6", "torch" ]
|
|
47 |
|
48 |
[kernel.fp8_common]
|
49 |
language = "cuda-hipify"
|
50 |
-
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "
|
51 |
rocm-archs = [ "gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx1030", "gfx1100", "gfx1101" ]
|
52 |
src = [
|
53 |
"fp8/amd/hip_float8.h",
|
@@ -61,7 +61,7 @@ include = [ "." ]
|
|
61 |
depends = [ "torch" ]
|
62 |
|
63 |
[kernel.fp8_marlin]
|
64 |
-
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "
|
65 |
src = [
|
66 |
"fp8/fp8_marlin.cu",
|
67 |
"gptq_marlin/marlin.cuh",
|
@@ -71,7 +71,7 @@ depends = [ "torch" ]
|
|
71 |
|
72 |
[kernel.int8_common]
|
73 |
language = "cuda-hipify"
|
74 |
-
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "
|
75 |
rocm-archs = [ "gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx1030", "gfx1100", "gfx1101" ]
|
76 |
src = [
|
77 |
"compressed_tensors/int8_quant_kernels.cu",
|
@@ -81,7 +81,7 @@ include = [ "." ]
|
|
81 |
depends = [ "torch" ]
|
82 |
|
83 |
[kernel.gptq_marlin]
|
84 |
-
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "
|
85 |
src = [
|
86 |
"core/scalar_type.hpp",
|
87 |
"gptq_marlin/awq_marlin_repack.cu",
|
@@ -94,7 +94,7 @@ include = [ "." ]
|
|
94 |
depends = [ "torch" ]
|
95 |
|
96 |
[kernel.marlin]
|
97 |
-
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "
|
98 |
src = [
|
99 |
"core/scalar_type.hpp",
|
100 |
"marlin/dense/common/base.h",
|
|
|
11 |
include = [ "." ]
|
12 |
|
13 |
[kernel.cutlass_w8a8]
|
14 |
+
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
|
15 |
src = [
|
16 |
"core/math.hpp",
|
17 |
"cutlass_w8a8/common.hpp",
|
|
|
47 |
|
48 |
[kernel.fp8_common]
|
49 |
language = "cuda-hipify"
|
50 |
+
cuda-capabilities = [ "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
|
51 |
rocm-archs = [ "gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx1030", "gfx1100", "gfx1101" ]
|
52 |
src = [
|
53 |
"fp8/amd/hip_float8.h",
|
|
|
61 |
depends = [ "torch" ]
|
62 |
|
63 |
[kernel.fp8_marlin]
|
64 |
+
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
|
65 |
src = [
|
66 |
"fp8/fp8_marlin.cu",
|
67 |
"gptq_marlin/marlin.cuh",
|
|
|
71 |
|
72 |
[kernel.int8_common]
|
73 |
language = "cuda-hipify"
|
74 |
+
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
|
75 |
rocm-archs = [ "gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx1030", "gfx1100", "gfx1101" ]
|
76 |
src = [
|
77 |
"compressed_tensors/int8_quant_kernels.cu",
|
|
|
81 |
depends = [ "torch" ]
|
82 |
|
83 |
[kernel.gptq_marlin]
|
84 |
+
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
|
85 |
src = [
|
86 |
"core/scalar_type.hpp",
|
87 |
"gptq_marlin/awq_marlin_repack.cu",
|
|
|
94 |
depends = [ "torch" ]
|
95 |
|
96 |
[kernel.marlin]
|
97 |
+
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
|
98 |
src = [
|
99 |
"core/scalar_type.hpp",
|
100 |
"marlin/dense/common/base.h",
|