> $GITHUB_ENV
+ echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
+ echo "EOF" >> $GITHUB_ENV
+
+ - name: Extract image url
+ id: extract_image_url
+ continue-on-error: true
+ run: |
+ set -eux
+
+ echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
+ echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
+ echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
+ echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
+
+ - name: Comment PR
+ uses: mshick/add-pr-comment@v2
+ id: comment_pr
+ if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
+ with:
+ message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
+ message: |
+
+
+ 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
+
+
+
+
+
+ Expand details for performance related PR only
+
+ - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
+ - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
+ - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
+ - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
+ - ${{ env.BENCH_GRAPH_XLABEL }}
+
+
+
+
+
+
+
+
+ More
+
+ ```mermaid
+ ${{ env.PROMPT_TOKENS_SECONDS }}
+ ```
+
+
+
+
+
+
+ More
+
+ ```mermaid
+ ${{ env.PREDICTED_TOKENS_SECONDS }}
+ ```
+
+
+
+
+
+
+
+ Details
+
+
+
+
+
+
+ More
+
+ ```mermaid
+ ${{ env.KV_CACHE_USAGE_RATIO }}
+ ```
+
+
+
+
+
+
+ More
+
+ ```mermaid
+ ${{ env.REQUESTS_PROCESSING }}
+ ```
+
+
+
+
+
+
diff --git a/llama.cpp/.github/workflows/build.yml b/llama.cpp/.github/workflows/build.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9e4cb5eae164dc81e13235b7650bbf3cf2502d8b
--- /dev/null
+++ b/llama.cpp/.github/workflows/build.yml
@@ -0,0 +1,1645 @@
+name: CI
+
+on:
+ workflow_dispatch: # allows manual triggering
+ inputs:
+ create_release:
+ description: 'Create new release'
+ required: true
+ type: boolean
+ push:
+ branches:
+ - master
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
+ pull_request:
+ types: [opened, synchronize, reopened]
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+ cancel-in-progress: true
+
+# Fine-grant permission
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
+permissions:
+ contents: write # for creating release
+
+env:
+ BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+ GGML_NLOOP: 3
+ GGML_N_THREADS: 1
+ LLAMA_LOG_COLORS: 1
+ LLAMA_LOG_PREFIX: 1
+ LLAMA_LOG_TIMESTAMPS: 1
+
+jobs:
+ macOS-latest-cmake-arm64:
+ runs-on: macos-14
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: macOS-latest-cmake-arm64
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ continue-on-error: true
+ run: |
+ brew update
+
+ - name: Build
+ id: cmake_build
+ run: |
+ sysctl -a
+ cmake -B build \
+ -DCMAKE_BUILD_RPATH="@loader_path" \
+ -DLLAMA_FATAL_WARNINGS=ON \
+ -DLLAMA_CURL=ON \
+ -DGGML_METAL_USE_BF16=ON \
+ -DGGML_METAL_EMBED_LIBRARY=ON \
+ -DGGML_RPC=ON
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L 'main|curl' --verbose --timeout 900
+
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+ else
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Pack artifacts
+ id: pack_artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ run: |
+ cp LICENSE ./build/bin/
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
+
+ - name: Upload artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
+ name: llama-bin-macos-arm64.zip
+
+ macOS-latest-cmake-x64:
+ runs-on: macos-13
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: macOS-latest-cmake-x64
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ continue-on-error: true
+ run: |
+ brew update
+
+ - name: Build
+ id: cmake_build
+ run: |
+ sysctl -a
+ # Metal is disabled due to intermittent failures with Github runners not having a GPU:
+ # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
+ cmake -B build \
+ -DCMAKE_BUILD_RPATH="@loader_path" \
+ -DLLAMA_FATAL_WARNINGS=ON \
+ -DLLAMA_CURL=ON \
+ -DGGML_METAL=OFF \
+ -DGGML_RPC=ON
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose --timeout 900
+
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+ else
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Pack artifacts
+ id: pack_artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ run: |
+ cp LICENSE ./build/bin/
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
+
+ - name: Upload artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
+ name: llama-bin-macos-x64.zip
+
+ ubuntu-cpu-cmake:
+ runs-on: ubuntu-22.04
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-cpu-cmake
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ run: |
+ sudo apt-get update
+ sudo apt-get install build-essential libcurl4-openssl-dev
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build \
+ -DLLAMA_FATAL_WARNINGS=ON \
+ -DLLAMA_CURL=ON \
+ -DGGML_RPC=ON
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L 'main|curl' --verbose --timeout 900
+
+ - name: Test llama2c conversion
+ id: llama2c_test
+ run: |
+ cd build
+ echo "Fetch tokenizer"
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
+ echo "Fetch llama2c model"
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
+ ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
+ ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
+
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+ else
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Pack artifacts
+ id: pack_artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ run: |
+ cp LICENSE ./build/bin/
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
+
+ - name: Upload artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
+ name: llama-bin-ubuntu-x64.zip
+
+ ubuntu-latest-cmake-sanitizer:
+ runs-on: ubuntu-latest
+
+ continue-on-error: true
+
+ strategy:
+ matrix:
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
+ build_type: [Debug]
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ run: |
+ sudo apt-get update
+ sudo apt-get install build-essential
+
+ - name: Build
+ id: cmake_build
+ if: ${{ matrix.sanitizer != 'THREAD' }}
+ run: |
+ cmake -B build \
+ -DLLAMA_FATAL_WARNINGS=ON \
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
+
+ - name: Build (no OpenMP)
+ id: cmake_build_no_openmp
+ if: ${{ matrix.sanitizer == 'THREAD' }}
+ run: |
+ cmake -B build \
+ -DLLAMA_FATAL_WARNINGS=ON \
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+ -DGGML_OPENMP=OFF
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose --timeout 900
+
+ ubuntu-latest-llguidance:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Dependencies
+ id: depends
+ run: |
+ sudo apt-get update
+ sudo apt-get install build-essential
+
+ - name: Build
+ id: cmake_build
+ run: |
+ mkdir build
+ cd build
+ cmake .. \
+ -DLLAMA_FATAL_WARNINGS=ON \
+ -DLLAMA_LLGUIDANCE=ON
+ cmake --build . --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose --timeout 900
+
+ ubuntu-latest-cmake-rpc:
+ runs-on: ubuntu-latest
+
+ continue-on-error: true
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-latest-cmake-rpc
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ run: |
+ sudo apt-get update
+ sudo apt-get install build-essential
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build \
+ -DGGML_RPC=ON
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ ctest -L main --verbose
+
+ ubuntu-22-cmake-vulkan:
+ runs-on: ubuntu-22.04
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-22-cmake-vulkan
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ run: |
+ wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
+ sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
+ sudo apt-get update -y
+ sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build \
+ -DGGML_VULKAN=ON
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Test
+ id: cmake_test
+ run: |
+ cd build
+ # This is using llvmpipe and runs slower than other backends
+ ctest -L main --verbose --timeout 1800
+
+ ubuntu-22-cmake-hip:
+ runs-on: ubuntu-22.04
+ container: rocm/dev-ubuntu-22.04:6.0.2
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Dependencies
+ id: depends
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-22-cmake-hip
+ evict-old-files: 1d
+
+ - name: Build with native CMake HIP support
+ id: cmake_build
+ run: |
+ cmake -B build -S . \
+ -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
+ -DGGML_HIP=ON
+ cmake --build build --config Release -j $(nproc)
+
+ - name: Build with legacy HIP support
+ id: cmake_build_legacy_hip
+ run: |
+ cmake -B build2 -S . \
+ -DCMAKE_C_COMPILER=hipcc \
+ -DCMAKE_CXX_COMPILER=hipcc \
+ -DGGML_HIP=ON
+ cmake --build build2 --config Release -j $(nproc)
+
+ ubuntu-22-cmake-musa:
+ runs-on: ubuntu-22.04
+ container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Dependencies
+ id: depends
+ run: |
+ apt-get update
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-22-cmake-musa
+ evict-old-files: 1d
+
+ - name: Build with native CMake MUSA support
+ id: cmake_build
+ run: |
+ cmake -B build -S . \
+ -DGGML_MUSA=ON
+ cmake --build build --config Release -j $(nproc)
+
+ ubuntu-22-cmake-sycl:
+ runs-on: ubuntu-22.04
+
+ continue-on-error: true
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: add oneAPI to apt
+ shell: bash
+ run: |
+ cd /tmp
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+
+ - name: install oneAPI dpcpp compiler
+ shell: bash
+ run: |
+ sudo apt update
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
+
+ - name: install oneAPI MKL library
+ shell: bash
+ run: |
+ sudo apt install intel-oneapi-mkl-devel
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-22-cmake-sycl
+ evict-old-files: 1d
+
+ - name: Build
+ id: cmake_build
+ run: |
+ source /opt/intel/oneapi/setvars.sh
+ cmake -B build \
+ -DGGML_SYCL=ON \
+ -DCMAKE_C_COMPILER=icx \
+ -DCMAKE_CXX_COMPILER=icpx
+ cmake --build build --config Release -j $(nproc)
+
+ ubuntu-22-cmake-sycl-fp16:
+ runs-on: ubuntu-22.04
+
+ continue-on-error: true
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: add oneAPI to apt
+ shell: bash
+ run: |
+ cd /tmp
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+
+ - name: install oneAPI dpcpp compiler
+ shell: bash
+ run: |
+ sudo apt update
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
+
+ - name: install oneAPI MKL library
+ shell: bash
+ run: |
+ sudo apt install intel-oneapi-mkl-devel
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-22-cmake-sycl-fp16
+ evict-old-files: 1d
+
+ - name: Build
+ id: cmake_build
+ run: |
+ source /opt/intel/oneapi/setvars.sh
+ cmake -B build \
+ -DGGML_SYCL=ON \
+ -DCMAKE_C_COMPILER=icx \
+ -DCMAKE_CXX_COMPILER=icpx \
+ -DGGML_SYCL_F16=ON
+ cmake --build build --config Release -j $(nproc)
+
+ macOS-latest-cmake-ios:
+ runs-on: macos-latest
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: macOS-latest-cmake-ios
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ continue-on-error: true
+ run: |
+ brew update
+
+ - name: Build
+ id: cmake_build
+ run: |
+ sysctl -a
+ cmake -B build -G Xcode \
+ -DGGML_METAL_USE_BF16=ON \
+ -DGGML_METAL_EMBED_LIBRARY=ON \
+ -DLLAMA_BUILD_EXAMPLES=OFF \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DLLAMA_BUILD_SERVER=OFF \
+ -DCMAKE_SYSTEM_NAME=iOS \
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
+
+ macOS-latest-cmake-tvos:
+ runs-on: macos-latest
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: macOS-latest-cmake-tvos
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ continue-on-error: true
+ run: |
+ brew update
+
+ - name: Build
+ id: cmake_build
+ run: |
+ sysctl -a
+ cmake -B build -G Xcode \
+ -DGGML_METAL_USE_BF16=ON \
+ -DGGML_METAL_EMBED_LIBRARY=ON \
+ -DLLAMA_BUILD_EXAMPLES=OFF \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DLLAMA_BUILD_SERVER=OFF \
+ -DCMAKE_SYSTEM_NAME=tvOS \
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
+
+ macOS-latest-swift:
+ runs-on: macos-latest
+
+ strategy:
+ matrix:
+ destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: macOS-latest-swift
+ evict-old-files: 1d
+
+ - name: Dependencies
+ id: depends
+ continue-on-error: true
+ run: |
+ brew update
+
+ - name: Build llama.cpp with CMake
+ id: cmake_build
+ run: |
+ sysctl -a
+ cmake -B build -G Xcode \
+ -DGGML_METAL_USE_BF16=ON \
+ -DGGML_METAL_EMBED_LIBRARY=ON \
+ -DLLAMA_BUILD_EXAMPLES=OFF \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DLLAMA_BUILD_SERVER=OFF \
+ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
+ sudo cmake --install build --config Release
+
+ - name: xcodebuild for swift package
+ id: xcodebuild
+ run: |
+ xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
+
+ windows-msys2:
+ runs-on: windows-latest
+
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
+
+ steps:
+ - name: Clone
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: windows-msys2
+ variant: sccache
+ evict-old-files: 1d
+
+ - name: Setup ${{ matrix.sys }}
+ uses: msys2/setup-msys2@v2
+ with:
+ update: true
+ msystem: ${{matrix.sys}}
+ install: >-
+ base-devel
+ git
+ mingw-w64-${{matrix.env}}-toolchain
+ mingw-w64-${{matrix.env}}-cmake
+ mingw-w64-${{matrix.env}}-openblas
+
+ - name: Build using CMake
+ shell: msys2 {0}
+ run: |
+ cmake -B build
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
+
+ - name: Clean after building using CMake
+ shell: msys2 {0}
+ run: |
+ rm -rf build
+
+ - name: Build using CMake w/ OpenBLAS
+ shell: msys2 {0}
+ run: |
+ cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
+
+ windows-latest-cmake:
+ runs-on: windows-latest
+
+ env:
+ OPENBLAS_VERSION: 0.3.23
+ SDE_VERSION: 9.33.0-2024-01-07
+ VULKAN_VERSION: 1.3.261.1
+
+ strategy:
+ matrix:
+ include:
+ - build: 'noavx-x64'
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
+ - build: 'avx2-x64'
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
+ - build: 'avx-x64'
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
+ - build: 'avx512-x64'
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
+ - build: 'openblas-x64'
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
+ - build: 'kompute-x64'
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
+ - build: 'vulkan-x64'
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
+ - build: 'llvm-arm64'
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
+ - build: 'msvc-arm64'
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
+ - build: 'llvm-arm64-opencl-adreno'
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: windows-latest-cmake-${{ matrix.build }}
+ variant: sccache
+ evict-old-files: 1d
+
+ - name: Clone Kompute submodule
+ id: clone_kompute
+ if: ${{ matrix.build == 'kompute-x64' }}
+ run: |
+ git submodule update --init ggml/src/ggml-kompute/kompute
+
+ - name: Download OpenBLAS
+ id: get_openblas
+ if: ${{ matrix.build == 'openblas-x64' }}
+ run: |
+ curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
+ curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
+ mkdir $env:RUNNER_TEMP/openblas
+ tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
+ $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
+ & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
+
+ - name: Install Vulkan SDK
+ id: get_vulkan
+ if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
+ run: |
+ curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
+ & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
+ Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
+ Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
+
+ - name: Install Ninja
+ id: install_ninja
+ run: |
+ choco install ninja
+
+ - name: Install OpenCL Headers and Libs
+ id: install_opencl
+ if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
+ run: |
+ git clone https://github.com/KhronosGroup/OpenCL-Headers
+ cd OpenCL-Headers
+ cmake -B build `
+ -DBUILD_TESTING=OFF `
+ -DOPENCL_HEADERS_BUILD_TESTING=OFF `
+ -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
+ cmake --build build --target install
+ git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
+ cd OpenCL-ICD-Loader
+ cmake -B build-arm64-release `
+ -A arm64 `
+ -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
+ cmake --build build-arm64-release --target install --config release
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -S . -B build ${{ matrix.defines }}
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
+
+ - name: Add libopenblas.dll
+ id: add_libopenblas_dll
+ if: ${{ matrix.build == 'openblas-x64' }}
+ run: |
+ cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
+ cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
+
+ - name: Check AVX512F support
+ id: check_avx512f
+ if: ${{ matrix.build == 'avx512-x64' }}
+ continue-on-error: true
+ run: |
+ cd build
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
+ $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
+ echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
+ & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
+ .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
+
+ - name: Test
+ id: cmake_test
+ # not all machines have native AVX-512
+ if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
+ run: |
+ cd build
+ ctest -L main -C Release --verbose --timeout 900
+
+ - name: Test (Intel SDE)
+ id: cmake_test_sde
+ if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
+ run: |
+ curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
+ # for some weird reason windows tar doesn't like sde tar.xz
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
+ $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
+ cd build
+ $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
+ & $sde -future -- ctest -L main -C Release --verbose --timeout 900
+
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+ else
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Pack artifacts
+ id: pack_artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ run: |
+ Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
+ Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
+
+ - name: Upload artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
+ name: llama-bin-win-${{ matrix.build }}.zip
+
+ ubuntu-latest-cmake-cuda:
+ runs-on: ubuntu-latest
+ container: nvidia/cuda:12.6.2-devel-ubuntu24.04
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Install dependencies
+ env:
+ DEBIAN_FRONTEND: noninteractive
+ run: |
+ apt update
+ apt install -y cmake build-essential ninja-build libgomp1 git
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ubuntu-latest-cmake-cuda
+ evict-old-files: 1d
+
+ - name: Build with CMake
+ run: |
+ cmake -S . -B build -G Ninja \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_CUDA_ARCHITECTURES=89-real \
+ -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
+ -DLLAMA_FATAL_WARNINGS=ON \
+ -DGGML_NATIVE=OFF \
+ -DGGML_CUDA=ON
+ cmake --build build
+
+ windows-2019-cmake-cuda:
+ runs-on: windows-2019
+
+ strategy:
+ matrix:
+ cuda: ['12.4', '11.7']
+ build: ['cuda']
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Install ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
+ variant: sccache
+ evict-old-files: 1d
+
+ - name: Install Cuda Toolkit 11.7
+ if: ${{ matrix.cuda == '11.7' }}
+ run: |
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
+ choco install unzip -y
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
+ echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
+
+ - name: Install Cuda Toolkit 12.4
+ if: ${{ matrix.cuda == '12.4' }}
+ run: |
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
+ choco install unzip -y
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
+ echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
+
+ - name: Install Ninja
+ id: install_ninja
+ run: |
+ choco install ninja
+
+ - name: Build
+ id: cmake_build
+ shell: cmd
+ run: |
+ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
+ cmake -S . -B build -G "Ninja Multi-Config" ^
+ -DLLAMA_BUILD_SERVER=ON ^
+ -DGGML_NATIVE=OFF ^
+ -DGGML_CUDA=ON ^
+ -DGGML_RPC=ON
+ set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
+ cmake --build build --config Release -j %NINJA_JOBS% -t ggml
+ cmake --build build --config Release
+
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+ else
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Pack artifacts
+ id: pack_artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ run: |
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
+
+ - name: Upload artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
+ name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
+
+ - name: Copy and pack Cuda runtime
+ if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
+ run: |
+ echo "Cuda install location: ${{ env.CUDA_PATH }}"
+ $dst='.\build\bin\cudart\'
+ robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
+ robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
+ 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
+
+ - name: Upload Cuda runtime
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ uses: actions/upload-artifact@v4
+ with:
+ path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
+ name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
+
+ windows-latest-cmake-sycl:
+ runs-on: windows-latest
+
+ defaults:
+ run:
+ shell: bash
+
+ env:
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
+ WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
+ ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: windows-latest-cmake-sycl
+ variant: sccache
+ evict-old-files: 1d
+
+ - name: Install
+ run: |
+ scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
+
+ - name: Build
+ id: cmake_build
+ run: examples/sycl/win-build-sycl.bat
+
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+ else
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Build the release package
+ id: pack_artifacts
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ run: |
+ echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
+
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
+
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
+
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
+
+ cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
+ cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
+
+ echo "cp oneAPI running time dll files to ./build/bin done"
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
+
+ - name: Upload the release package
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
+ name: llama-bin-win-sycl-x64.zip
+
+ windows-latest-cmake-hip:
+ if: ${{ github.event.inputs.create_release != 'true' }}
+ runs-on: windows-latest
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+
+ - name: Install
+ id: depends
+ run: |
+ $ErrorActionPreference = "Stop"
+ write-host "Downloading AMD HIP SDK Installer"
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+ write-host "Installing AMD HIP SDK"
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
+ write-host "Completed AMD HIP SDK installation"
+
+ - name: Verify ROCm
+ id: verify
+ run: |
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
+
+ - name: Install ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: ${{ github.job }}
+ evict-old-files: 1d
+
+ - name: Build
+ id: cmake_build
+ run: |
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
+ cmake -G "Unix Makefiles" -B build -S . `
+ -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
+ -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
+ -DCMAKE_BUILD_TYPE=Release `
+ -DGGML_HIP=ON `
+ -DGGML_RPC=ON
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
+
+ windows-latest-cmake-hip-release:
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+ runs-on: windows-latest
+
+ strategy:
+ matrix:
+ gpu_target: [gfx1100, gfx1101, gfx1030]
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: windows-latest-cmake-hip-release
+ evict-old-files: 1d
+
+ - name: Install
+ id: depends
+ run: |
+ $ErrorActionPreference = "Stop"
+ write-host "Downloading AMD HIP SDK Installer"
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
+ write-host "Installing AMD HIP SDK"
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
+ write-host "Completed AMD HIP SDK installation"
+
+ - name: Verify ROCm
+ id: verify
+ run: |
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
+
+ - name: Build
+ id: cmake_build
+ run: |
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
+ cmake -G "Unix Makefiles" -B build -S . `
+ -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
+ -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
+ -DCMAKE_BUILD_TYPE=Release `
+ -DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
+ -DGGML_HIP=ON `
+ -DGGML_RPC=ON
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
+ md "build\bin\rocblas\library\"
+ cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
+ cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
+ cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
+
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+ else
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Pack artifacts
+ id: pack_artifacts
+ run: |
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
+ name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
+
+ ios-xcode-build:
+ runs-on: macos-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Build
+ id: cmake_build
+ run: |
+ sysctl -a
+ cmake -B build -G Xcode \
+ -DGGML_METAL_USE_BF16=ON \
+ -DGGML_METAL_EMBED_LIBRARY=ON \
+ -DLLAMA_BUILD_EXAMPLES=OFF \
+ -DLLAMA_BUILD_TESTS=OFF \
+ -DLLAMA_BUILD_SERVER=OFF \
+ -DCMAKE_SYSTEM_NAME=iOS \
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
+ sudo cmake --install build --config Release
+
+ - name: xcodebuild for swift package
+ id: xcodebuild
+ run: |
+ xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
+
+ - name: Build Xcode project
+ run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
+
+ android-build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Clone
+ uses: actions/checkout@v4
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: android-build
+ evict-old-files: 1d
+
+ - name: Set up JDK
+ uses: actions/setup-java@v3
+ with:
+ java-version: 17
+ distribution: zulu
+
+ - name: Setup Android SDK
+ uses: android-actions/setup-android@v3
+ with:
+ log-accepted-android-sdk-licenses: false
+
+ - name: Build
+ run: |
+ cd examples/llama.android
+
+ ./gradlew build --no-daemon
+
+ release:
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
+
+ runs-on: ubuntu-latest
+
+ needs:
+ - ubuntu-cpu-cmake
+ - windows-latest-cmake
+ - windows-2019-cmake-cuda
+ - windows-latest-cmake-hip-release
+ - macOS-latest-cmake-arm64
+ - macOS-latest-cmake-x64
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2.16
+ with:
+ key: release
+ evict-old-files: 1d
+
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
+ else
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
+ fi
+
+ - name: Download artifacts
+ id: download-artifact
+ uses: actions/download-artifact@v4
+ with:
+ path: ./artifact
+
+ - name: Move artifacts
+ id: move_artifacts
+ run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
+
+ - name: Create release
+ id: create_release
+ uses: ggml-org/action-create-release@v1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+ tag_name: ${{ steps.tag.outputs.name }}
+
+ - name: Upload release
+ id: upload_release
+ uses: actions/github-script@v3
+ with:
+ github-token: ${{secrets.GITHUB_TOKEN}}
+ script: |
+ const path = require('path');
+ const fs = require('fs');
+ const release_id = '${{ steps.create_release.outputs.id }}';
+ for (let file of await fs.readdirSync('./artifact/release')) {
+ if (path.extname(file) === '.zip') {
+ console.log('uploadReleaseAsset', file);
+ await github.repos.uploadReleaseAsset({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ release_id: release_id,
+ name: file,
+ data: await fs.readFileSync(`./artifact/release/${file}`)
+ });
+ }
+ }
+
+# ubuntu-latest-gcc:
+# runs-on: ubuntu-latest
+#
+# strategy:
+# matrix:
+# build: [Debug, Release]
+#
+# steps:
+# - name: Clone
+# uses: actions/checkout@v4
+#
+# - name: Dependencies
+# run: |
+# sudo apt-get update
+# sudo apt-get install build-essential
+# sudo apt-get install cmake
+#
+# - name: Configure
+# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+#
+# - name: Build
+# run: |
+# make
+#
+# ubuntu-latest-clang:
+# runs-on: ubuntu-latest
+#
+# strategy:
+# matrix:
+# build: [Debug, Release]
+#
+# steps:
+# - name: Clone
+# uses: actions/checkout@v4
+#
+# - name: Dependencies
+# run: |
+# sudo apt-get update
+# sudo apt-get install build-essential
+# sudo apt-get install cmake
+#
+# - name: Configure
+# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
+#
+# - name: Build
+# run: |
+# make
+#
+# ubuntu-latest-gcc-sanitized:
+# runs-on: ubuntu-latest
+#
+# strategy:
+# matrix:
+# sanitizer: [ADDRESS, THREAD, UNDEFINED]
+#
+# steps:
+# - name: Clone
+# uses: actions/checkout@v4
+#
+# - name: Dependencies
+# run: |
+# sudo apt-get update
+# sudo apt-get install build-essential
+# sudo apt-get install cmake
+#
+# - name: Configure
+# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
+#
+# - name: Build
+# run: |
+# make
+#
+# windows:
+# runs-on: windows-latest
+#
+# strategy:
+# matrix:
+# build: [Release]
+# arch: [Win32, x64]
+# include:
+# - arch: Win32
+# s2arc: x86
+# - arch: x64
+# s2arc: x64
+#
+# steps:
+# - name: Clone
+# uses: actions/checkout@v4
+#
+# - name: Add msbuild to PATH
+# uses: microsoft/setup-msbuild@v1
+#
+# - name: Configure
+# run: >
+# cmake -S . -B ./build -A ${{ matrix.arch }}
+# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+#
+# - name: Build
+# run: |
+# cd ./build
+# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
+#
+# - name: Upload binaries
+# uses: actions/upload-artifact@v4
+# with:
+# name: llama-bin-${{ matrix.arch }}
+# path: build/bin/${{ matrix.build }}
+#
+# windows-blas:
+# runs-on: windows-latest
+#
+# strategy:
+# matrix:
+# build: [Release]
+# arch: [Win32, x64]
+# blas: [ON]
+# include:
+# - arch: Win32
+# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
+# s2arc: x86
+# - arch: x64
+# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
+# s2arc: x64
+#
+# steps:
+# - name: Clone
+# uses: actions/checkout@v4
+#
+# - name: Add msbuild to PATH
+# uses: microsoft/setup-msbuild@v1
+#
+# - name: Fetch OpenBLAS
+# if: matrix.blas == 'ON'
+# run: |
+# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
+# 7z x blas.zip -oblas -y
+# copy blas/include/cblas.h .
+# copy blas/include/openblas_config.h .
+# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
+#
+# - name: Configure
+# run: >
+# cmake -S . -B ./build -A ${{ matrix.arch }}
+# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
+# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
+#
+# - name: Build
+# run: |
+# cd ./build
+# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
+#
+# - name: Copy libopenblas.dll
+# if: matrix.blas == 'ON'
+# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
+#
+# - name: Upload binaries
+# if: matrix.blas == 'ON'
+# uses: actions/upload-artifact@v4
+# with:
+# name: llama-blas-bin-${{ matrix.arch }}
+# path: build/bin/${{ matrix.build }}
+#
+# emscripten:
+# runs-on: ubuntu-latest
+#
+# strategy:
+# matrix:
+# build: [Release]
+#
+# steps:
+# - name: Clone
+# uses: actions/checkout@v4
+#
+# - name: Dependencies
+# run: |
+# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
+# tar -xvf master.tar.gz
+# emsdk-master/emsdk update
+# emsdk-master/emsdk install latest
+# emsdk-master/emsdk activate latest
+#
+# - name: Configure
+# run: echo "tmp"
+#
+# - name: Build
+# run: |
+# pushd emsdk-master
+# source ./emsdk_env.sh
+# popd
+# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
+# make
+
+ openEuler-latest-cmake-cann:
+ if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
+ defaults:
+ run:
+ shell: bash -el {0}
+ runs-on: ubuntu-24.04-arm
+ strategy:
+ matrix:
+ cann:
+ - '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
+ device:
+ - 'ascend910b3'
+ build:
+ - 'Release'
+ container: ascendai/cann:${{ matrix.cann }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Dependencies
+ run: |
+ yum update -y
+ yum install -y git gcc gcc-c++ make cmake
+
+ - name: Build
+ run: |
+ export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
+
+ cmake -S . -B build \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
+ -DGGML_CANN=on \
+ -DSOC_TYPE=${{ matrix.device }}
+ cmake --build build -j $(nproc)
diff --git a/llama.cpp/.github/workflows/close-issue.yml b/llama.cpp/.github/workflows/close-issue.yml
new file mode 100644
index 0000000000000000000000000000000000000000..55f4e807151d5166b6bb2587b5bf9af57812fee7
--- /dev/null
+++ b/llama.cpp/.github/workflows/close-issue.yml
@@ -0,0 +1,28 @@
+name: Close inactive issues
+on:
+ schedule:
+ - cron: "42 0 * * *"
+
+# Fine-grant permission
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
+permissions:
+ issues: write
+
+jobs:
+ close-issues:
+ runs-on: ubuntu-latest
+ permissions:
+ issues: write
+ pull-requests: write
+ steps:
+ - uses: actions/stale@v5
+ with:
+ exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
+ days-before-issue-stale: 30
+ days-before-issue-close: 14
+ stale-issue-label: "stale"
+ close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
+ days-before-pr-stale: -1
+ days-before-pr-close: -1
+ operations-per-run: 10000
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/llama.cpp/.github/workflows/docker.yml b/llama.cpp/.github/workflows/docker.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3c72d9c87d0a5f3c21b2e67419cc883dffb5297f
--- /dev/null
+++ b/llama.cpp/.github/workflows/docker.yml
@@ -0,0 +1,173 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+# GitHub recommends pinning actions to a commit SHA.
+# To get a newer version, you will need to update the SHA.
+# You can also reference a tag or branch, but the action may change without warning.
+
+name: Publish Docker image
+
+on:
+ workflow_dispatch: # allows manual triggering
+ schedule:
+ # Rebuild daily rather than on every push because it is expensive
+ - cron: '12 4 * * *'
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+ cancel-in-progress: true
+
+# Fine-grant permission
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
+permissions:
+ packages: write
+
+jobs:
+ push_to_registry:
+ name: Push Docker image to Docker Hub
+
+ runs-on: ubuntu-22.04
+ env:
+ COMMIT_SHA: ${{ github.sha }}
+ strategy:
+ fail-fast: false
+ matrix:
+ config:
+ # Multi-stage build
+ - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
+ - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
+ - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
+ - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
+ - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
+ # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
+ #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
+ steps:
+ - name: Check out the repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0 # preserve git history, so we can determine the build number
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v2
+ with:
+ registry: ghcr.io
+ username: ${{ github.repository_owner }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Determine tag name
+ id: tag
+ shell: bash
+ run: |
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
+ REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
+ REPO_NAME="${{ github.event.repository.name }}"
+
+ # determine tag name postfix (build number, commit hash)
+ if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
+ TAG_POSTFIX="-b${BUILD_NUMBER}"
+ else
+ SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
+ TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
+ fi
+ # list all tags possible
+ if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
+ TYPE=""
+ else
+ TYPE="-${{ matrix.config.tag }}"
+ fi
+ PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
+ FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
+ LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
+ SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
+ echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
+ echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
+ echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
+ echo "full_output_tags=$FULLTAGS" # print out for debugging
+ echo "light_output_tags=$LIGHTTAGS" # print out for debugging
+ echo "server_output_tags=$SERVERTAGS" # print out for debugging
+ env:
+ GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+ GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
+
+ - name: Free Disk Space (Ubuntu)
+ if: ${{ matrix.config.free_disk_space == true }}
+ uses: ggml-org/free-disk-space@v1.3.1
+ with:
+ # this might remove tools that are actually needed,
+ # if set to "true" but frees about 6 GB
+ tool-cache: false
+
+ # all of these default to true, but feel free to set to
+ # "false" if necessary for your workflow
+ android: true
+ dotnet: true
+ haskell: true
+ large-packages: true
+ docker-images: true
+ swap-storage: true
+
+ - name: Build and push Full Docker image (tagged + versioned)
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ push: true
+ platforms: ${{ matrix.config.platforms }}
+ # tag list is generated from step above
+ tags: ${{ steps.tag.outputs.full_output_tags }}
+ file: ${{ matrix.config.dockerfile }}
+ target: full
+ provenance: false
+ # using github experimental cache
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ # return to this if the experimental github cache is having issues
+ #cache-to: type=local,dest=/tmp/.buildx-cache
+ #cache-from: type=local,src=/tmp/.buildx-cache
+
+ - name: Build and push Light Docker image (tagged + versioned)
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ push: true
+ platforms: ${{ matrix.config.platforms }}
+ # tag list is generated from step above
+ tags: ${{ steps.tag.outputs.light_output_tags }}
+ file: ${{ matrix.config.dockerfile }}
+ target: light
+ provenance: false
+ # using github experimental cache
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ # return to this if the experimental github cache is having issues
+ #cache-to: type=local,dest=/tmp/.buildx-cache
+ #cache-from: type=local,src=/tmp/.buildx-cache
+
+ - name: Build and push Server Docker image (tagged + versioned)
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
+ uses: docker/build-push-action@v6
+ with:
+ context: .
+ push: true
+ platforms: ${{ matrix.config.platforms }}
+ # tag list is generated from step above
+ tags: ${{ steps.tag.outputs.server_output_tags }}
+ file: ${{ matrix.config.dockerfile }}
+ target: server
+ provenance: false
+ # using github experimental cache
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+ # return to this if the experimental github cache is having issues
+ #cache-to: type=local,dest=/tmp/.buildx-cache
+ #cache-from: type=local,src=/tmp/.buildx-cache
diff --git a/llama.cpp/.github/workflows/editorconfig.yml b/llama.cpp/.github/workflows/editorconfig.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e2f74091c81b4105da3301d8004c96c357cc02b4
--- /dev/null
+++ b/llama.cpp/.github/workflows/editorconfig.yml
@@ -0,0 +1,29 @@
+name: EditorConfig Checker
+
+on:
+ workflow_dispatch: # allows manual triggering
+ inputs:
+ create_release:
+ description: 'Create new release'
+ required: true
+ type: boolean
+ push:
+ branches:
+ - master
+ pull_request:
+ branches:
+ - master
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ editorconfig:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: editorconfig-checker/action-editorconfig-checker@v2
+ with:
+ version: v3.0.3
+ - run: editorconfig-checker
diff --git a/llama.cpp/.github/workflows/gguf-publish.yml b/llama.cpp/.github/workflows/gguf-publish.yml
new file mode 100644
index 0000000000000000000000000000000000000000..73854a5b0a1e746b50801bf07c313cd269d0fcfc
--- /dev/null
+++ b/llama.cpp/.github/workflows/gguf-publish.yml
@@ -0,0 +1,44 @@
+# This workflow will upload a Python Package using Twine when a GGUF release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+# See `gguf-py/README.md` for how to make a release.
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+ workflow_dispatch:
+ push:
+ # Pattern matched against refs/tags
+ tags:
+ - 'gguf-v*' # Push events to every version tag
+
+
+jobs:
+ deploy:
+
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.9.x'
+ - name: Install dependencies
+ run: |
+ cd gguf-py
+ python -m pip install poetry
+ poetry install
+
+ - name: Build package
+ run: cd gguf-py && poetry build
+ - name: Publish package
+ uses: pypa/gh-action-pypi-publish@release/v1
+ with:
+ password: ${{ secrets.PYPI_API_TOKEN }}
+ packages-dir: gguf-py/dist
diff --git a/llama.cpp/.github/workflows/labeler.yml b/llama.cpp/.github/workflows/labeler.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d888d7125be305dde65715b3e32c8b05ae287c37
--- /dev/null
+++ b/llama.cpp/.github/workflows/labeler.yml
@@ -0,0 +1,17 @@
+name: "Pull Request Labeler"
+on:
+- pull_request_target
+
+jobs:
+ labeler:
+ permissions:
+ contents: read
+ pull-requests: write
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ repository: "ggerganov/llama.cpp"
+ - uses: actions/labeler@v5
+ with:
+ configuration-path: '.github/labeler.yml'
diff --git a/llama.cpp/.github/workflows/python-check-requirements.yml b/llama.cpp/.github/workflows/python-check-requirements.yml
new file mode 100644
index 0000000000000000000000000000000000000000..798f6578ccf62afecd8a228fa6eeb126c3d7f98c
--- /dev/null
+++ b/llama.cpp/.github/workflows/python-check-requirements.yml
@@ -0,0 +1,33 @@
+name: Python check requirements.txt
+
+on:
+ push:
+ paths:
+ - '.github/workflows/python-check-requirements.yml'
+ - 'scripts/check-requirements.sh'
+ - 'convert*.py'
+ - '**/requirements*.txt'
+ pull_request:
+ paths:
+ - '.github/workflows/python-check-requirements.yml'
+ - 'scripts/check-requirements.sh'
+ - 'convert*.py'
+ - '**/requirements*.txt'
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ python-check-requirements:
+ runs-on: ubuntu-latest
+ name: check-requirements
+ steps:
+ - name: Check out source repository
+ uses: actions/checkout@v4
+ - name: Set up Python environment
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+ - name: Run check-requirements.sh script
+ run: bash scripts/check-requirements.sh
diff --git a/llama.cpp/.github/workflows/python-lint.yml b/llama.cpp/.github/workflows/python-lint.yml
new file mode 100644
index 0000000000000000000000000000000000000000..57d56fa245a29c8fcb99ededa4320ce386ab6728
--- /dev/null
+++ b/llama.cpp/.github/workflows/python-lint.yml
@@ -0,0 +1,30 @@
+name: flake8 Lint
+
+on:
+ push:
+ branches:
+ - master
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
+ pull_request:
+ types: [opened, synchronize, reopened]
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ flake8-lint:
+ runs-on: ubuntu-latest
+ name: Lint
+ steps:
+ - name: Check out source repository
+ uses: actions/checkout@v4
+ - name: Set up Python environment
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+ - name: flake8 Lint
+ uses: py-actions/flake8@v2
+ with:
+ plugins: "flake8-no-print"
diff --git a/llama.cpp/.github/workflows/python-type-check.yml b/llama.cpp/.github/workflows/python-type-check.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a05abf3792f2bf250d8653e4ea5ff81984e3cbea
--- /dev/null
+++ b/llama.cpp/.github/workflows/python-type-check.yml
@@ -0,0 +1,40 @@
+name: Python Type-Check
+
+on:
+ push:
+ paths:
+ - '.github/workflows/python-type-check.yml'
+ - 'pyrightconfig.json'
+ - '**.py'
+ - '**/requirements*.txt'
+ pull_request:
+ paths:
+ - '.github/workflows/python-type-check.yml'
+ - 'pyrightconfig.json'
+ - '**.py'
+ - '**/requirements*.txt'
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ python-type-check:
+ runs-on: ubuntu-latest
+ name: pyright type-check
+ steps:
+ - name: Check out source repository
+ uses: actions/checkout@v4
+ - name: Set up Python environment
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+ - name: Install Python dependencies
+ # TODO: use a venv
+ run: pip install -r requirements/requirements-all.txt
+ - name: Type-check with Pyright
+ uses: jakebailey/pyright-action@v2
+ with:
+ version: 1.1.382
+ level: warning
+ warnings: true
diff --git a/llama.cpp/.github/workflows/server.yml b/llama.cpp/.github/workflows/server.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d530f1b6093a5835ebe11c65884c5f3fb64616ec
--- /dev/null
+++ b/llama.cpp/.github/workflows/server.yml
@@ -0,0 +1,216 @@
+# Server build and tests
+name: Server
+
+on:
+ workflow_dispatch: # allows manual triggering
+ inputs:
+ sha:
+ description: 'Commit SHA1 to build'
+ required: false
+ type: string
+ slow_tests:
+ description: 'Run slow tests'
+ required: true
+ type: boolean
+ push:
+ branches:
+ - master
+ paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
+ pull_request:
+ types: [opened, synchronize, reopened]
+ paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
+
+env:
+ LLAMA_LOG_COLORS: 1
+ LLAMA_LOG_PREFIX: 1
+ LLAMA_LOG_TIMESTAMPS: 1
+ LLAMA_LOG_VERBOSITY: 10
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ server:
+ runs-on: ubuntu-latest
+
+ strategy:
+ matrix:
+ sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
+ build_type: [RelWithDebInfo]
+ include:
+ - build_type: Release
+ sanitizer: ""
+ fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
+
+ steps:
+ - name: Dependencies
+ id: depends
+ run: |
+ sudo apt-get update
+ sudo apt-get -y install \
+ build-essential \
+ xxd \
+ git \
+ cmake \
+ curl \
+ wget \
+ language-pack-en \
+ libcurl4-openssl-dev
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
+
+ - name: Python setup
+ id: setup_python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Tests dependencies
+ id: test_dependencies
+ run: |
+ pip install -r examples/server/tests/requirements.txt
+
+ # Setup nodejs (to be used for verifying bundled index.html)
+ - uses: actions/setup-node@v4
+ with:
+ node-version: '22.11.0'
+
+ - name: Verify bundled index.html
+ id: verify_server_index_html
+ run: |
+ git config --global --add safe.directory $(realpath .)
+ cd examples/server/webui
+ git status
+ npm ci
+ npm run build
+ git status
+ modified_files="$(git status -s)"
+ echo "Modified files: ${modified_files}"
+ if [ -n "${modified_files}" ]; then
+ echo "Repository is dirty or server/webui is not built as expected"
+ echo "Hint: You may need to follow Web UI build guide in server/README.md"
+ echo "${modified_files}"
+ exit 1
+ fi
+
+ - name: Build (no OpenMP)
+ id: cmake_build_no_openmp
+ if: ${{ matrix.sanitizer == 'THREAD' }}
+ run: |
+ cmake -B build \
+ -DGGML_NATIVE=OFF \
+ -DLLAMA_BUILD_SERVER=ON \
+ -DLLAMA_CURL=ON \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
+ -DGGML_OPENMP=OFF ;
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
+
+ - name: Build (sanitizers)
+ id: cmake_build_sanitizers
+ if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
+ run: |
+ cmake -B build \
+ -DGGML_NATIVE=OFF \
+ -DLLAMA_BUILD_SERVER=ON \
+ -DLLAMA_CURL=ON \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
+
+ - name: Build (sanitizers)
+ id: cmake_build
+ if: ${{ matrix.sanitizer == '' }}
+ run: |
+ cmake -B build \
+ -DGGML_NATIVE=OFF \
+ -DLLAMA_BUILD_SERVER=ON \
+ -DLLAMA_CURL=ON \
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
+
+ - name: Tests
+ id: server_integration_tests
+ if: ${{ matrix.sanitizer == '' }}
+ run: |
+ cd examples/server/tests
+ ./tests.sh
+
+ - name: Tests (sanitizers)
+ id: server_integration_tests_sanitizers
+ if: ${{ matrix.sanitizer != '' }}
+ run: |
+ cd examples/server/tests
+ LLAMA_SANITIZE=1 ./tests.sh
+
+ - name: Slow tests
+ id: server_integration_tests_slow
+ if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
+ run: |
+ cd examples/server/tests
+ SLOW_TESTS=1 ./tests.sh
+
+
+ server-windows:
+ runs-on: windows-2019
+
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
+
+ - name: libCURL
+ id: get_libcurl
+ env:
+ CURL_VERSION: 8.6.0_6
+ run: |
+ curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
+ mkdir $env:RUNNER_TEMP/libcurl
+ tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
+
+ - name: Build
+ id: cmake_build
+ run: |
+ cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
+
+ - name: Python setup
+ id: setup_python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Tests dependencies
+ id: test_dependencies
+ run: |
+ pip install -r examples/server/tests/requirements.txt
+
+ - name: Copy Libcurl
+ id: prepare_libcurl
+ run: |
+ cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
+
+ - name: Tests
+ id: server_integration_tests
+ if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
+ run: |
+ cd examples/server/tests
+ $env:PYTHONIOENCODING = ":replace"
+ pytest -v -x -m "not slow"
+
+ - name: Slow tests
+ id: server_integration_tests_slow
+ if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
+ run: |
+ cd examples/server/tests
+ $env:SLOW_TESTS = "1"
+ pytest -v -x
diff --git a/llama.cpp/.gitignore b/llama.cpp/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..987a27e937ff154670bbd1bd54150d314ccf81c9
--- /dev/null
+++ b/llama.cpp/.gitignore
@@ -0,0 +1,145 @@
+# Extensions
+
+*.a
+*.bat
+*.bin
+*.d
+*.dll
+*.dot
+*.etag
+*.exe
+*.gcda
+*.gcno
+*.gcov
+*.gguf
+*.gguf.json
+*.lastModified
+*.log
+*.metallib
+*.o
+*.so
+*.swp
+*.tmp
+
+# IDE / OS
+
+.cache/
+.ccls-cache/
+.direnv/
+.DS_Store
+.envrc
+.idea/
+.swiftpm
+.vs/
+.vscode/
+nppBackup
+
+
+# Coverage
+
+gcovr-report/
+lcov-report/
+
+# Build Artifacts
+
+tags
+.build/
+build*
+!build-info.cmake
+!build-info.cpp.in
+!build-info.sh
+!build.zig
+!docs/build.md
+/libllama.so
+/llama-*
+/vulkan-shaders-gen
+android-ndk-*
+arm_neon.h
+cmake-build-*
+CMakeSettings.json
+compile_commands.json
+ggml-metal-embed.metal
+llama-batched-swift
+/rpc-server
+out/
+tmp/
+autogen-*.md
+
+# Deprecated
+
+/main
+/server
+
+# CI
+
+!.github/workflows/*.yml
+
+# Models
+
+models/*
+models-mnt
+!models/.editorconfig
+!models/ggml-vocab-*.gguf*
+
+# Zig
+zig-out/
+zig-cache/
+
+# Logs
+
+ppl-*.txt
+qnt-*.txt
+perf-*.txt
+
+# Examples
+
+examples/jeopardy/results.txt
+examples/server/*.css.hpp
+examples/server/*.html.hpp
+examples/server/*.js.hpp
+examples/server/*.mjs.hpp
+!build_64.sh
+!examples/*.bat
+!examples/*/*.kts
+!examples/*/*/*.kts
+!examples/sycl/*.bat
+!examples/sycl/*.sh
+
+# Server Web UI temporary files
+node_modules
+examples/server/webui/dist
+
+# Python
+
+/.venv
+__pycache__/
+*/poetry.lock
+poetry.toml
+
+# Nix
+/result
+
+# Test binaries
+/tests/test-backend-ops
+/tests/test-double-float
+/tests/test-grad0
+/tests/test-grammar-parser
+/tests/test-llama-grammar
+/tests/test-opt
+/tests/test-quantize-fns
+/tests/test-quantize-perf
+/tests/test-rope
+/tests/test-sampling
+/tests/test-tokenizer-0
+/tests/test-tokenizer-1-bpe
+/tests/test-tokenizer-1-spm
+
+# Scripts
+!/scripts/install-oneapi.bat
+
+# Test models for lora adapters
+/lora-tests
+
+# Local scripts
+/run-vim.sh
+/run-chat.sh
diff --git a/llama.cpp/.gitmodules b/llama.cpp/.gitmodules
new file mode 100644
index 0000000000000000000000000000000000000000..c64ba56a4e4d5c082d803de71eb1e8f83a5b8a74
--- /dev/null
+++ b/llama.cpp/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "kompute"]
+ path = ggml/src/ggml-kompute/kompute
+ url = https://github.com/nomic-ai/kompute.git
diff --git a/llama.cpp/.pre-commit-config.yaml b/llama.cpp/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0dff5f584658563caf2103df46685f2c401c3967
--- /dev/null
+++ b/llama.cpp/.pre-commit-config.yaml
@@ -0,0 +1,16 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+exclude: prompts/.*.txt
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.6.0
+ hooks:
+ - id: trailing-whitespace
+ - id: end-of-file-fixer
+ - id: check-yaml
+ - id: check-added-large-files
+- repo: https://github.com/PyCQA/flake8
+ rev: 7.0.0
+ hooks:
+ - id: flake8
+ additional_dependencies: [flake8-no-print]
diff --git a/llama.cpp/AUTHORS b/llama.cpp/AUTHORS
new file mode 100644
index 0000000000000000000000000000000000000000..f4b9a8c7b399ae1c7c2459770782c33b2e1eb3b9
--- /dev/null
+++ b/llama.cpp/AUTHORS
@@ -0,0 +1,1047 @@
+# date: Tue Feb 4 13:04:05 EET 2025
+# this file is auto-generated by scripts/gen-authors.sh
+
+0cc4m
+0xspringtime <110655352+0xspringtime@users.noreply.github.com>
+20kdc
+2f38b454
+3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
+44670 <44670@users.noreply.github.com>
+65a <10104049+65a@users.noreply.github.com>
+AN Long
+AT
+Aarni Koskela
+Aaron Miller
+Aaryaman Vasishta
+Abheek Gulati
+Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
+Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com>
+Adithya Balaji
+AdithyanI
+Adrian
+Adrian Hesketh
+Adrien Gallouët
+Adrien Gallouët
+Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
+Ahmet Zeer
+AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
+AidanBeltonS
+Aisuko
+Akarshan Biswas
+Akarshan Biswas
+Al Mochkin <14274697+amochkin@users.noreply.github.com>
+Albert Jin
+Alberto <57916483+albbus-stack@users.noreply.github.com>
+Alberto Cabrera Pérez
+Alberto Cabrera Pérez
+Alex
+Alex Azarov
+Alex Azarov
+Alex Klinkhamer
+Alex Klinkhamer
+Alex Nguyen
+Alex O'Connell <35843486+acon96@users.noreply.github.com>
+Alex Petenchea
+Alex Renda
+Alex Tuddenham <61622354+AlexsCode@users.noreply.github.com>
+Alex von Gluck IV
+Alexey Parfenov
+Ali Chraghi <63465728+alichraghi@users.noreply.github.com>
+Ali Nehzat
+Ali Tariq
+Alon
+AlpinDale <52078762+AlpinDale@users.noreply.github.com>
+Amir
+AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
+Ananta Bastola
+Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
+András Salamon
+Andreas (Andi) Kunar
+Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
+Andrei
+Andrew Canis
+Andrew Downing
+Andrew Duffy
+Andrew Godfrey
+Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
+Andy Salerno
+Andy Tai
+Anthony Van de Gejuchte
+Antonis Makropoulos
+Arik Poznanski
+Armen Kaleshian
+Artem
+Artem Zinnatullin
+Artyom Lebedev
+Asbjørn Olling
+Ásgeir Bjarni Ingvarsson
+Asghar Ghorbani
+Ashish <1856117+ashishdatta@users.noreply.github.com>
+Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
+Ashraful Islam
+Atsushi Tatsuma
+Austin <77757836+teleprint-me@users.noreply.github.com>
+AustinMroz
+BADR
+Bach Le
+Bailey Chittle <39804642+bachittle@users.noreply.github.com>
+BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com>
+Bartowski
+Behnam M <58621210+ibehnam@users.noreply.github.com>
+Ben Ashbaugh
+Ben Garney
+Ben Siraphob
+Ben Williams
+Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com>
+Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
+Benson Wong
+Bernat Vadell
+Bernhard M. Wiedemann
+Bert Wagner
+Billel Mokeddem
+Bingan <70050083+binganao@users.noreply.github.com>
+Bjarke Viksøe <164612031+bviksoe@users.noreply.github.com>
+Bodo Graumann
+Bono Lv
+Borislav Stanimirov
+Borislav Stanimirov
+Branden Butler
+Brandon Squizzato <35474886+bsquizz@users.noreply.github.com>
+Brian
+Brian Cunnie
+Bruce MacDonald
+Bryan Honof
+CJ Pais
+CRD716
+Calvin Laurenson
+Cameron
+Cameron Kaiser
+Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
+CarryFun <76023481+CarryFun@users.noreply.github.com>
+Carsten Kragelund Jørgensen
+CarterLi999 <664681047@qq.com>
+Casey Primozic
+Casey Primozic
+CausalLM <148736309+CausalLM@users.noreply.github.com>
+Cebtenzzre
+CentricStorm
+Chad Brewbaker
+Changyeon Kim
+Chao Jiang
+Charles Xu <63788048+chaxu01@users.noreply.github.com>
+Charles Xu
+Chen Xi
+Chen Xi
+Cheng Shao
+Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
+Chris Elrod
+Chris Kuehl
+Christian Demsar
+Christian Demsar
+Christian Falch <875252+chrfalch@users.noreply.github.com>
+Christian Kastner
+Christian Kögler
+Christian Köhnenkamp
+Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com>
+Christopher Nielsen <62156882+mascguy@users.noreply.github.com>
+Clark Saben <76020733+csaben@users.noreply.github.com>
+Clint Herron
+Conrad Kramer
+Corentin REGAL
+CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
+Csaba Kecskemeti
+Cuong Trinh Manh
+DAN™
+Damian Stewart
+Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
+Dan Johansson
+Dane Madsen
+DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com>
+Daniel Bevenius
+Daniel Drake
+Daniel Hiltgen
+Daniel Illescas Romero
+Daniel Kleine <53251018+d-kleine@users.noreply.github.com>
+Daniele <57776841+daniandtheweb@users.noreply.github.com>
+DannyDaemonic
+Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
+Dave
+Dave Airlie
+Dave Airlie
+Dave Della Costa
+David Friehs
+David Kennedy
+David Pflug
+David Renshaw
+David Sommers <12738+databyte@users.noreply.github.com>
+David Yang
+DavidKorczynski
+Dawid Potocki
+Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
+Dean
+Deins
+Denis Spasyuk <34203011+dspasyuk@users.noreply.github.com>
+Derrick T. Woolworth
+Deven Mistry <31466137+deven367@users.noreply.github.com>
+Dibakar Gope
+Didzis Gosko
+Diego Devesa
+Diogo Teles Sant'Anna
+Djip007 <3705339+Djip007@users.noreply.github.com>
+Djip007
+Don Mahurin
+DooWoong Lee (David)
+Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
+Dou Xinpeng <15529241576@163.com>
+Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
+Douglas Hanley
+Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
+Ebey Abraham
+Echo Nolan
+Ed Lee
+Ed Lepedus
+Eddie-Wang
+Edward Taylor
+Elaine
+Elbios <141279586+Elbios@users.noreply.github.com>
+Elton Kola
+Emreerdog <34742675+Emreerdog@users.noreply.github.com>
+Engininja2 <139037756+Engininja2@users.noreply.github.com>
+Equim
+Eric Curtin
+Eric Curtin
+Eric Sommerlade
+Eric Zhang <34133756+EZForever@users.noreply.github.com>
+Erik Garrison
+Erik Scholz
+Esko Toivonen
+Ettore Di Giacinto
+Evan Jones
+Evan Miller
+Eve <139727413+netrunnereve@users.noreply.github.com>
+Evgeny Kurnevsky
+Ewout ter Hoeven
+ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
+FK
+Fabian
+Fabio R. Sluzala
+Faez Shakil
+Faisal Zaghloul
+Faisal Zaghloul
+Fan Shupei
+FantasyGmm <16450052+FantasyGmm@users.noreply.github.com>
+Farbod Bijary <110523279+farbodbj@users.noreply.github.com>
+Fattire <528174+fat-tire@users.noreply.github.com>
+Felix
+Finn Voorhees
+Firat
+FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
+Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
+Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
+Francisco Melo <43780565+francis2tm@users.noreply.github.com>
+Frank Mai
+FrankHB
+Frankie Robertson
+Fred Douglas <43351173+fredlas@users.noreply.github.com>
+Frederik Vogel
+Gabe Goodhart
+Gabe Goodhart
+Gaetan Bisson
+GainLee
+Galunid
+Gary Linscott
+Gary Mulder
+Gavin Zhao
+Genkagaku.GPT
+Georgi Gerganov
+Gilad S
+Gilad S. <7817232+giladgd@users.noreply.github.com>
+Giuseppe Scrivano
+GiviMAD
+Govlzkoy
+Guillaume "Vermeille" Sanchez
+Guillaume Wenzek
+Guoliang Hua <32868157+nbcsm@users.noreply.github.com>
+Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
+Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com>
+Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
+Haggai Nuchi
+Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
+Hamdoud Hakem <90524568+hamdoudhakem@users.noreply.github.com>
+HanishKVC
+Haohui Mai
+Haoxiang Fei
+Harald Fernengel
+Hatsune Miku <129688334+at8u@users.noreply.github.com>
+HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com>
+Haus1
+Henk Poley
+Henri Vasserman
+Henrik Forstén
+Herman Semenov
+Hesen Peng
+HimariO
+Hoang Nguyen
+Hong Bo PENG
+Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
+Howard Su
+Hua Jiang
+Huang Qi
+Huawei Lin
+Hugo Roussel
+Huifeng Ou <79071290+ho2103@users.noreply.github.com>
+Ian Bull
+Ian Bull
+Ian Scrivener
+Icecream95
+Ido S
+IgnacioFDM
+Igor Okulist
+Ihar Hrachyshka
+Ikko Eltociear Ashimine
+Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com>
+Ionoclast Laboratories
+Isaac McFadyen
+IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com>
+Ivan
+Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
+Ivan Komarov
+Ivan Stepanov
+JFLFY2255
+JH23X <165871467+JH23X@users.noreply.github.com>
+Jack Mousseau
+Jack Mousseau
+JackJollimore <130917767+JackJollimore@users.noreply.github.com>
+Jaeden Amero
+Jaemin Son
+Jafar Uruç
+Jag Chadha
+Jakub N
+James A Capozzoli <157492257+jac-jim@users.noreply.github.com>
+James Reynolds
+Jan Boon
+Jan Boon
+Jan Ploski
+Jannis Schönleber
+Jared Van Bortel
+Jared Van Bortel
+Jason McCartney
+Jason Stillerman
+Jean-Christophe Hoelt
+Jean-Michaël Celerier
+Jed Fox
+Jeff Bolz
+Jeffrey Morgan