File size: 7,578 Bytes
5a29263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
find_package(OpenCL REQUIRED)
find_package(Python3 REQUIRED)

set(TARGET_NAME ggml-opencl)

ggml_add_backend_library(${TARGET_NAME}
                         ggml-opencl.cpp
                         ../../include/ggml-opencl.h)
target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCL_LIBRARIES})
target_include_directories(${TARGET_NAME} PRIVATE ${OpenCL_INCLUDE_DIRS})

if (GGML_OPENCL_PROFILING)
    message(STATUS "OpenCL profiling enabled (increases CPU overhead)")
    add_compile_definitions(GGML_OPENCL_PROFILING)
endif ()

add_compile_definitions(GGML_OPENCL_SOA_Q)

if (GGML_OPENCL_USE_ADRENO_KERNELS)
    message(STATUS "OpenCL will use matmul kernels optimized for Adreno")
    add_compile_definitions(GGML_OPENCL_USE_ADRENO_KERNELS)
endif ()

if (GGML_OPENCL_EMBED_KERNELS)
    add_compile_definitions(GGML_OPENCL_EMBED_KERNELS)

    set(OPENCL_CL_SOURCE_EMBED         "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl.cl.h")
    set(OPENCL_MM_CL_SOURCE_EMBED      "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_mm.cl.h")
    set(OPENCL_CVT_CL_SOURCE_EMBED     "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_cvt.cl.h")

    set(OPENCL_GEMV_NOSHUFFLE_SOURCE_EMBED             "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_gemv_noshuffle.cl.h")
    set(OPENCL_GEMV_NOSHUFFLE_GENERAL_SOURCE_EMBED     "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_gemv_noshuffle_general.cl.h")
    set(OPENCL_MUL_MAT_Ab_Bi_8x4_SOURCE_EMBED          "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_mul_mat_Ab_Bi_8x4.cl.h")
    set(OPENCL_TRANSPOSE_16_SOURCE_EMBED               "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_transpose_16.cl.h")
    set(OPENCL_TRANSPOSE_32_SOURCE_EMBED               "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_transpose_32.cl.h")
    set(OPENCL_TRANSPOSE_32_16_SOURCE_EMBED            "${CMAKE_BINARY_DIR}/autogenerated/ggml-opencl_transpose_32_16.cl.h")

    set(EMBED_KERNEL_SCRIPT             "${CMAKE_CURRENT_SOURCE_DIR}/kernels/embed_kernel.py")
    file(MAKE_DIRECTORY                 "${CMAKE_BINARY_DIR}/autogenerated")

    include_directories("${CMAKE_BINARY_DIR}/autogenerated")

    # Python must be accessible from command line
    add_custom_command(
        OUTPUT ${OPENCL_CL_SOURCE_EMBED}
        COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
            ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl.cl
            ${OPENCL_CL_SOURCE_EMBED}
        DEPENDS kernels/ggml-opencl.cl ${EMBED_KERNEL_SCRIPT}
        COMMENT "Generate ggml-opencl.cl.h"
    )

    add_custom_command(
        OUTPUT ${OPENCL_MM_CL_SOURCE_EMBED}
        COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
            ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_mm.cl
            ${OPENCL_MM_CL_SOURCE_EMBED}
        DEPENDS kernels/ggml-opencl_mm.cl ${EMBED_KERNEL_SCRIPT}
        COMMENT "Generate ggml-opencl_mm.cl.h"
    )

    add_custom_command(
        OUTPUT ${OPENCL_CVT_CL_SOURCE_EMBED}
        COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
            ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_cvt.cl
            ${OPENCL_CVT_CL_SOURCE_EMBED}
        DEPENDS kernels/ggml-opencl_cvt.cl ${EMBED_KERNEL_SCRIPT}
        COMMENT "Generate ggml-opencl_cvt.cl.h"
    )

    add_custom_command(
        OUTPUT ${OPENCL_GEMV_NOSHUFFLE_SOURCE_EMBED}
        COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
            ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_gemv_noshuffle.cl
            ${OPENCL_GEMV_NOSHUFFLE_SOURCE_EMBED}
        DEPENDS kernels/ggml-opencl_gemv_noshuffle.cl ${EMBED_KERNEL_SCRIPT}
        COMMENT "Generate ggml-opencl_gemv_noshuffle.cl.h"
    )

    add_custom_command(
        OUTPUT ${OPENCL_GEMV_NOSHUFFLE_GENERAL_SOURCE_EMBED}
        COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
            ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_gemv_noshuffle_general.cl
            ${OPENCL_GEMV_NOSHUFFLE_GENERAL_SOURCE_EMBED}
        DEPENDS kernels/ggml-opencl_gemv_noshuffle_general.cl ${EMBED_KERNEL_SCRIPT}
        COMMENT "Generate ggml-opencl_gemv_noshuffle_general.cl.h"
    )

    add_custom_command(
        OUTPUT ${OPENCL_MUL_MAT_Ab_Bi_8x4_SOURCE_EMBED}
        COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
            ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_mul_mat_Ab_Bi_8x4.cl
            ${OPENCL_MUL_MAT_Ab_Bi_8x4_SOURCE_EMBED}
        DEPENDS kernels/ggml-opencl_mul_mat_Ab_Bi_8x4.cl ${EMBED_KERNEL_SCRIPT}
        COMMENT "Generate ggml-opencl_mul_mat_Ab_Bi_8x4.cl.cl.h"
    )

    add_custom_command(
        OUTPUT ${OPENCL_TRANSPOSE_16_SOURCE_EMBED}
        COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
            ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_transpose_16.cl
            ${OPENCL_TRANSPOSE_16_SOURCE_EMBED}
        DEPENDS kernels/ggml-opencl_transpose_16.cl ${EMBED_KERNEL_SCRIPT}
        COMMENT "Generate ggml-opencl_transpose_16.cl.h"
    )

    add_custom_command(
        OUTPUT ${OPENCL_TRANSPOSE_32_SOURCE_EMBED}
        COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
            ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_transpose_32.cl
            ${OPENCL_TRANSPOSE_32_SOURCE_EMBED}
        DEPENDS kernels/ggml-opencl_transpose_32.cl ${EMBED_KERNEL_SCRIPT}
        COMMENT "Generate ggml-opencl_transpose_32.cl.h"
    )

    add_custom_command(
        OUTPUT ${OPENCL_TRANSPOSE_32_16_SOURCE_EMBED}
        COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT}
            ${CMAKE_CURRENT_SOURCE_DIR}/kernels/ggml-opencl_transpose_32_16.cl
            ${OPENCL_TRANSPOSE_32_16_SOURCE_EMBED}
        DEPENDS kernels/ggml-opencl_transpose_32_16.cl ${EMBED_KERNEL_SCRIPT}
        COMMENT "Generate ggml-opencl_transpose_32_16.cl.h"
    )

    target_sources(${TARGET_NAME} PRIVATE
                   ${OPENCL_CL_SOURCE_EMBED}
                   ${OPENCL_MM_CL_SOURCE_EMBED}
                   ${OPENCL_CVT_CL_SOURCE_EMBED}
                   ${OPENCL_GEMV_NOSHUFFLE_SOURCE_EMBED}
                   ${OPENCL_GEMV_NOSHUFFLE_GENERAL_SOURCE_EMBED}
                   ${OPENCL_MUL_MAT_Ab_Bi_8x4_SOURCE_EMBED}
                   ${OPENCL_TRANSPOSE_16_SOURCE_EMBED}
                   ${OPENCL_TRANSPOSE_32_SOURCE_EMBED}
                   ${OPENCL_TRANSPOSE_32_16_SOURCE_EMBED})
else ()
    # copy ggml-opencl.cl to bin directory
    configure_file(kernels/ggml-opencl.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl.cl COPYONLY)
    configure_file(kernels/ggml-opencl_mm.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_mm.cl COPYONLY)
    configure_file(kernels/ggml-opencl_cvt.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_cvt.cl COPYONLY)

    configure_file(kernels/ggml-opencl_gemv_noshuffle.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_gemv_noshuffle.cl COPYONLY)
    configure_file(kernels/ggml-opencl_gemv_noshuffle_general.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_gemv_noshuffle_general.cl COPYONLY)
    configure_file(kernels/ggml-opencl_mul_mat_Ab_Bi_8x4.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_mul_mat_Ab_Bi_8x4.cl COPYONLY)
    configure_file(kernels/ggml-opencl_transpose_16.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_transpose_16.cl COPYONLY)
    configure_file(kernels/ggml-opencl_transpose_32.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_transpose_32.cl COPYONLY)
    configure_file(kernels/ggml-opencl_transpose_32_16.cl ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-opencl_transpose_32_16.cl COPYONLY)
endif ()