YZ-TAN's picture
Upload 2821 files
5a29263 verified
raw
history blame
15.4 kB
function(ggml_add_cpu_backend_variant_impl tag_name)
if (tag_name)
set(GGML_CPU_NAME ggml-cpu-${tag_name})
else()
set(GGML_CPU_NAME ggml-cpu)
endif()
ggml_add_backend_library(${GGML_CPU_NAME})
list (APPEND GGML_CPU_SOURCES
ggml-cpu/ggml-cpu.c
ggml-cpu/ggml-cpu.cpp
ggml-cpu/ggml-cpu-aarch64.cpp
ggml-cpu/ggml-cpu-aarch64.h
ggml-cpu/ggml-cpu-hbm.cpp
ggml-cpu/ggml-cpu-hbm.h
ggml-cpu/ggml-cpu-quants.c
ggml-cpu/ggml-cpu-quants.h
ggml-cpu/ggml-cpu-traits.cpp
ggml-cpu/ggml-cpu-traits.h
ggml-cpu/amx/amx.cpp
ggml-cpu/amx/amx.h
ggml-cpu/amx/mmq.cpp
ggml-cpu/amx/mmq.h
ggml-cpu/ggml-cpu-impl.h
)
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)
message(STATUS "Accelerate framework found")
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
else()
message(WARNING "Accelerate framework not found")
endif()
endif()
if (GGML_OPENMP)
find_package(OpenMP)
if (OpenMP_FOUND)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
else()
message(WARNING "OpenMP not found")
endif()
endif()
if (GGML_LLAMAFILE)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE)
list(APPEND GGML_CPU_SOURCES
ggml-cpu/llamafile/sgemm.cpp
ggml-cpu/llamafile/sgemm.h)
endif()
if (GGML_CPU_HBM)
find_library(memkind memkind REQUIRED)
message(STATUS "Using memkind for CPU HBM")
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
endif()
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
message(STATUS "ARM detected")
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
else()
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
endif()
if (GGML_NATIVE)
# -mcpu=native does not always enable all the features in some compilers,
# so we check for them manually and enable them if available
execute_process(
COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
INPUT_FILE "/dev/null"
OUTPUT_QUIET
ERROR_VARIABLE ARM_MCPU
RESULT_VARIABLE ARM_MCPU_RESULT
)
if (NOT ARM_MCPU_RESULT)
string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
endif()
if ("${ARM_MCPU_FLAG}" STREQUAL "")
set(ARM_MCPU_FLAG -mcpu=native)
message(STATUS "ARM -mcpu not found, -mcpu=native will be used")
endif()
include(CheckCXXSourceRuns)
function(check_arm_feature tag code)
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+${tag}")
check_cxx_source_runs(
"${code}"
GGML_MACHINE_SUPPORTS_${tag}
)
if (GGML_MACHINE_SUPPORTS_${tag})
set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+${tag}" PARENT_SCOPE)
else()
set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+no${tag}" PARENT_SCOPE)
endif()
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
endfunction()
check_arm_feature(dotprod "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }")
check_arm_feature(i8mm "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }")
check_arm_feature(sve "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}")
else()
if (GGML_CPU_ARM_ARCH)
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
endif()
endif()
# show enabled features
if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
set(FEAT_INPUT_FILE "NUL")
else()
set(FEAT_INPUT_FILE "/dev/null")
endif()
execute_process(
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
INPUT_FILE ${FEAT_INPUT_FILE}
OUTPUT_VARIABLE ARM_FEATURE
RESULT_VARIABLE ARM_FEATURE_RESULT
)
if (ARM_FEATURE_RESULT)
message(WARNING "Failed to get ARM features")
else()
foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC)
string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
if (NOT ${feature_pos} EQUAL -1)
message(STATUS "ARM feature ${feature} enabled")
endif()
endforeach()
endif()
endif()
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
message(STATUS "x86 detected")
if (MSVC)
# instruction set detection for MSVC only
if (GGML_NATIVE)
include(ggml-cpu/cmake/FindSIMD.cmake)
endif ()
if (GGML_AVX512)
list(APPEND ARCH_FLAGS /arch:AVX512)
# /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
# MSVC has no compile-time flags enabling specific
# AVX512 extensions, neither it defines the
# macros corresponding to the extensions.
# Do it manually.
list(APPEND ARCH_DEFINITIONS GGML_AVX512)
if (GGML_AVX512_VBMI)
list(APPEND ARCH_DEFINITIONS __AVX512VBMI__)
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
list(APPEND ARCH_FLAGS -mavx512vbmi)
endif()
endif()
if (GGML_AVX512_VNNI)
list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI)
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
list(APPEND ARCH_FLAGS -mavx512vnni)
endif()
endif()
if (GGML_AVX512_BF16)
list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16)
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
list(APPEND ARCH_FLAGS -mavx512bf16)
endif()
endif()
if (GGML_AMX_TILE)
list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE)
endif()
if (GGML_AMX_INT8)
list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8)
endif()
if (GGML_AMX_BF16)
list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16)
endif()
elseif (GGML_AVX2)
list(APPEND ARCH_FLAGS /arch:AVX2)
list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C)
elseif (GGML_AVX)
list(APPEND ARCH_FLAGS /arch:AVX)
list(APPEND ARCH_DEFINITIONS GGML_AVX)
else ()
list(APPEND ARCH_FLAGS /arch:SSE4.2)
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
endif()
if (GGML_AVX_VNNI)
list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
endif()
else ()
if (GGML_NATIVE)
list(APPEND ARCH_FLAGS -march=native)
else ()
list(APPEND ARCH_FLAGS -msse4.2)
list(APPEND ARCH_DEFINITIONS GGML_SSE42)
if (GGML_F16C)
list(APPEND ARCH_FLAGS -mf16c)
list(APPEND ARCH_DEFINITIONS GGML_F16C)
endif()
if (GGML_FMA)
list(APPEND ARCH_FLAGS -mfma)
list(APPEND ARCH_DEFINITIONS GGML_FMA)
endif()
if (GGML_AVX)
list(APPEND ARCH_FLAGS -mavx)
list(APPEND ARCH_DEFINITIONS GGML_AVX)
endif()
if (GGML_AVX2)
list(APPEND ARCH_FLAGS -mavx2)
list(APPEND ARCH_DEFINITIONS GGML_AVX2)
endif()
if (GGML_AVX_VNNI)
list(APPEND ARCH_FLAGS -mavxvnni)
list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI)
endif()
if (GGML_AVX512)
list(APPEND ARCH_FLAGS -mavx512f)
list(APPEND ARCH_FLAGS -mavx512cd)
list(APPEND ARCH_FLAGS -mavx512vl)
list(APPEND ARCH_FLAGS -mavx512dq)
list(APPEND ARCH_FLAGS -mavx512bw)
list(APPEND ARCH_DEFINITIONS GGML_AVX512)
endif()
if (GGML_AVX512_VBMI)
list(APPEND ARCH_FLAGS -mavx512vbmi)
list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI)
endif()
if (GGML_AVX512_VNNI)
list(APPEND ARCH_FLAGS -mavx512vnni)
list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI)
endif()
if (GGML_AVX512_BF16)
list(APPEND ARCH_FLAGS -mavx512bf16)
list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16)
endif()
if (GGML_AMX_TILE)
list(APPEND ARCH_FLAGS -mamx-tile)
list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE)
endif()
if (GGML_AMX_INT8)
list(APPEND ARCH_FLAGS -mamx-int8)
list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8)
endif()
if (GGML_AMX_BF16)
list(APPEND ARCH_FLAGS -mamx-bf16)
list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16)
endif()
endif()
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
message(STATUS "PowerPC detected")
execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M)
string(FIND "${POWER10_M}" "POWER10" substring_index)
if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "")
set(substring_index -1)
endif()
if (${substring_index} GREATER_EQUAL 0)
list(APPEND ARCH_FLAGS -mcpu=power10)
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
else()
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
# TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
message(STATUS "loongarch64 detected")
list(APPEND ARCH_FLAGS -march=loongarch64)
if (GGML_LASX)
list(APPEND ARCH_FLAGS -mlasx)
endif()
if (GGML_LSX)
list(APPEND ARCH_FLAGS -mlsx)
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
message(STATUS "RISC-V detected")
if (GGML_RVV)
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
endif()
else()
message(STATUS "Unknown architecture")
endif()
if (GGML_CPU_AARCH64)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
endif()
message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}")
target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES})
target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
if (GGML_BACKEND_DL)
if (GGML_NATIVE)
# the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
endif()
# The feature detection code is compiled as a separate target so that
# it can be built without the architecture flags
# Since multiple variants of the CPU backend may be included in the same
# build, using set_source_files_properties() to set the arch flags is not possible
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
endif()
if (EMSCRIPTEN)
set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
endif()
endfunction()