Spaces:
Build error
Build error
cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES | |
find_package(CUDAToolkit) | |
if (CUDAToolkit_FOUND) | |
message(STATUS "CUDA Toolkit found") | |
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) | |
# native == GPUs available at build time | |
# 52 == Maxwell, lowest CUDA 12 standard | |
# 60 == P100, FP16 CUDA intrinsics | |
# 61 == Pascal, __dp4a instruction (per-byte integer dot product) | |
# 70 == V100, FP16 tensor cores | |
# 75 == Turing, int8 tensor cores | |
if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24") | |
set(CMAKE_CUDA_ARCHITECTURES "native") | |
elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) | |
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") | |
else() | |
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") | |
endif() | |
endif() | |
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") | |
enable_language(CUDA) | |
file(GLOB GGML_HEADERS_CUDA "*.cuh") | |
list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h") | |
file(GLOB GGML_SOURCES_CUDA "*.cu") | |
file(GLOB SRCS "template-instances/fattn-mma*.cu") | |
list(APPEND GGML_SOURCES_CUDA ${SRCS}) | |
file(GLOB SRCS "template-instances/mmq*.cu") | |
list(APPEND GGML_SOURCES_CUDA ${SRCS}) | |
if (GGML_CUDA_FA_ALL_QUANTS) | |
file(GLOB SRCS "template-instances/fattn-vec*.cu") | |
list(APPEND GGML_SOURCES_CUDA ${SRCS}) | |
add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) | |
else() | |
file(GLOB SRCS "template-instances/fattn-vec*q4_0-q4_0.cu") | |
list(APPEND GGML_SOURCES_CUDA ${SRCS}) | |
file(GLOB SRCS "template-instances/fattn-vec*q8_0-q8_0.cu") | |
list(APPEND GGML_SOURCES_CUDA ${SRCS}) | |
file(GLOB SRCS "template-instances/fattn-vec*f16-f16.cu") | |
list(APPEND GGML_SOURCES_CUDA ${SRCS}) | |
endif() | |
ggml_add_backend_library(ggml-cuda | |
${GGML_HEADERS_CUDA} | |
${GGML_SOURCES_CUDA} | |
) | |
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE}) | |
if (GGML_CUDA_GRAPHS) | |
add_compile_definitions(GGML_CUDA_USE_GRAPHS) | |
endif() | |
if (GGML_CUDA_FORCE_MMQ) | |
add_compile_definitions(GGML_CUDA_FORCE_MMQ) | |
endif() | |
if (GGML_CUDA_FORCE_CUBLAS) | |
add_compile_definitions(GGML_CUDA_FORCE_CUBLAS) | |
endif() | |
if (GGML_CUDA_NO_VMM) | |
add_compile_definitions(GGML_CUDA_NO_VMM) | |
endif() | |
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) | |
add_compile_definitions(GGML_CUDA_F16) | |
endif() | |
if (GGML_CUDA_NO_PEER_COPY) | |
add_compile_definitions(GGML_CUDA_NO_PEER_COPY) | |
endif() | |
if (GGML_STATIC) | |
if (WIN32) | |
# As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library | |
target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt) | |
else () | |
target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) | |
endif() | |
else() | |
target_link_libraries(ggml-cuda PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt) | |
endif() | |
if (GGML_CUDA_NO_VMM) | |
# No VMM requested, no need to link directly with the cuda driver lib (libcuda.so) | |
else() | |
target_link_libraries(ggml-cuda PRIVATE CUDA::cuda_driver) | |
endif() | |
set(CUDA_CXX_FLAGS "") | |
set(CUDA_FLAGS -use_fast_math) | |
if (GGML_FATAL_WARNINGS) | |
list(APPEND CUDA_FLAGS -Werror all-warnings) | |
endif() | |
if (GGML_ALL_WARNINGS AND NOT MSVC) | |
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) | |
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") | |
list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER}) | |
endif() | |
execute_process( | |
COMMAND ${NVCC_CMD} -Xcompiler --version | |
OUTPUT_VARIABLE CUDA_CCFULLVER | |
ERROR_QUIET | |
) | |
if (NOT CUDA_CCFULLVER MATCHES clang) | |
set(CUDA_CCID "GNU") | |
execute_process( | |
COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion" | |
OUTPUT_VARIABLE CUDA_CCVER | |
ERROR_QUIET | |
) | |
else() | |
if (CUDA_CCFULLVER MATCHES Apple) | |
set(CUDA_CCID "AppleClang") | |
else() | |
set(CUDA_CCID "Clang") | |
endif() | |
string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER}) | |
endif() | |
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}") | |
ggml_get_flags(${CUDA_CCID} ${CUDA_CCVER}) | |
list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later | |
endif() | |
if (NOT MSVC) | |
list(APPEND CUDA_CXX_FLAGS -Wno-pedantic) | |
endif() | |
list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument | |
if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "") | |
list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED}) | |
endif() | |
target_compile_options(ggml-cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>") | |
else() | |
message(FATAL_ERROR "CUDA Toolkit not found") | |
endif() | |