diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..74949df0fb6e4b22f89b922b547c821aad437e14 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/llm.cc.obj filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-build/ctransformers.exp filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-build/ctransformers.lib filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-build/lib/ctransformers.dll filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.dylib filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.so filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.dylib filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.so filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/libctransformers.dylib filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/local/ctransformers.dll filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/libctransformers.dylib filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/libctransformers.so filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/libctransformers.dylib filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/libctransformers.so filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/libctransformers.dylib filter=lfs diff=lfs merge=lfs -text +ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/local/ctransformers.dll filter=lfs diff=lfs merge=lfs -text +ctransformers/ctransformers/lib/avx/libctransformers.dylib filter=lfs diff=lfs merge=lfs -text +ctransformers/ctransformers/lib/avx/libctransformers.so filter=lfs diff=lfs merge=lfs -text +ctransformers/ctransformers/lib/avx2/libctransformers.dylib filter=lfs diff=lfs merge=lfs -text +ctransformers/ctransformers/lib/avx2/libctransformers.so filter=lfs diff=lfs merge=lfs -text +ctransformers/ctransformers/lib/basic/libctransformers.dylib filter=lfs diff=lfs merge=lfs -text +ctransformers/models/submodules/ggml/examples/mnist/models/mnist/mnist_model.state_dict filter=lfs diff=lfs merge=lfs -text +ctransformers/models/submodules/ggml/examples/mnist/models/mnist/t10k-images.idx3-ubyte filter=lfs diff=lfs merge=lfs -text diff --git a/ctransformers/.gitattributes b/ctransformers/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..205021e49dd1f7342400b8e1254cc4e310fcc4a1 --- /dev/null +++ b/ctransformers/.gitattributes @@ -0,0 +1,2 @@ +# Enforce Unix newlines +* text=auto eol=lf diff --git a/ctransformers/.github/workflows/build.yml b/ctransformers/.github/workflows/build.yml new file mode 100644 index 0000000000000000000000000000000000000000..d3416503908baa35d415e359f26c183e1de4a335 --- /dev/null +++ b/ctransformers/.github/workflows/build.yml @@ -0,0 +1,67 @@ +name: build + +on: + push: + paths: + - 'models/**' + - CMakeLists.txt + branches: + - main + tags-ignore: + - '**' + workflow_dispatch: + +jobs: + build: + name: ${{ matrix.os }} ${{ matrix.instructions }} + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + os: + - ubuntu-20.04 + - macos-latest + - windows-latest + instructions: + - avx2 + - avx + - basic + + steps: + - uses: actions/checkout@v3 + + - name: Build + run: | + cmake -B build -DCT_INSTRUCTIONS=${{ matrix.instructions }} + cmake --build build --config Release + + mkdir tmp + mv build/lib tmp/${{ matrix.instructions }} + + - uses: actions/upload-artifact@v3 + with: + name: libraries + path: tmp + if-no-files-found: error + + finish: + needs: build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + ssh-key: ${{ secrets.DEPLOY_KEY }} + + - uses: actions/download-artifact@v3 + with: + name: libraries + path: ctransformers/lib + + - uses: git-actions/set-user@v1 + + - name: Save + run: | + git add ctransformers/lib + git commit -m 'Auto Build' + git push diff --git a/ctransformers/.github/workflows/tests.yml b/ctransformers/.github/workflows/tests.yml new file mode 100644 index 0000000000000000000000000000000000000000..c4100f53b0c8008d4b650db6a6cf62252a84e420 --- /dev/null +++ b/ctransformers/.github/workflows/tests.yml @@ -0,0 +1,47 @@ +name: tests + +on: + push: + paths: + - 'ctransformers/**' + - 'tests/**' + workflow_dispatch: + +jobs: + build: + name: Python ${{ matrix.python }} on ${{ matrix.os }} ${{ matrix.instructions }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-20.04 + - macos-latest + - windows-latest + instructions: + - avx2 + - avx + - basic + python: + - '3.8' + - '3.9' + - '3.10' + - '3.11' + exclude: + - os: macos-latest + instructions: avx2 + + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install huggingface-hub pytest + + - name: Test + run: pytest tests --lib ${{ matrix.instructions }} diff --git a/ctransformers/.gitignore b/ctransformers/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ca69205192ee784b873e937410cb5c5b4e6bfd74 --- /dev/null +++ b/ctransformers/.gitignore @@ -0,0 +1,754 @@ +tmp +*.bin + +# Created by https://www.toptal.com/developers/gitignore/api/c++,python,cmake,linux,macos,windows,sublimetext,vim,visualstudio,visualstudiocode +# Edit at https://www.toptal.com/developers/gitignore?templates=c++,python,cmake,linux,macos,windows,sublimetext,vim,visualstudio,visualstudiocode + +### C++ ### +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +### CMake ### +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps + +### CMake Patch ### +# External projects +*-prefix/ + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### SublimeText ### +# Cache files for Sublime Text +*.tmlanguage.cache +*.tmPreferences.cache +*.stTheme.cache + +# Workspace files are user-specific +*.sublime-workspace + +# Project files should be checked into the repository, unless a significant +# proportion of contributors will probably not be using Sublime Text +# *.sublime-project + +# SFTP configuration file +sftp-config.json +sftp-config-alt*.json + +# Package control specific files +Package Control.last-run +Package Control.ca-list +Package Control.ca-bundle +Package Control.system-ca-bundle +Package Control.cache/ +Package Control.ca-certs/ +Package Control.merged-ca-bundle +Package Control.user-ca-bundle +oscrypto-ca-bundle.crt +bh_unicode_properties.cache + +# Sublime-github package stores a github token in this file +# https://packagecontrol.io/packages/sublime-github +GitHub.sublime-settings + +### Vim ### +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +### VisualStudio ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.iobj +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +*.code-workspace + +# Local History for Visual Studio Code + +# Windows Installer files from build outputs + +# JetBrains Rider +*.sln.iml + +### VisualStudio Patch ### +# Additional files built by Visual Studio + +# End of https://www.toptal.com/developers/gitignore/api/c++,python,cmake,linux,macos,windows,sublimetext,vim,visualstudio,visualstudiocode diff --git a/ctransformers/.gitmodules b/ctransformers/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..eafcddfb529fb49c66731ef0412eb14c934fa261 --- /dev/null +++ b/ctransformers/.gitmodules @@ -0,0 +1,9 @@ +[submodule "models/ggml"] + path = models/submodules/ggml + url = https://github.com/ggerganov/ggml +[submodule "models/llama.cpp"] + path = models/submodules/llama.cpp + url = https://github.com/ggerganov/llama.cpp +[submodule "models/submodules/ggllm.cpp"] + path = models/submodules/ggllm.cpp + url = https://github.com/cmp-nct/ggllm.cpp diff --git a/ctransformers/CMakeLists.txt b/ctransformers/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a318830865a5e243c86c67b61a5126c7e3a7b0c9 --- /dev/null +++ b/ctransformers/CMakeLists.txt @@ -0,0 +1,144 @@ +cmake_minimum_required(VERSION 3.17) +project(ctransformers C CXX) + +set(CT_INSTRUCTIONS "avx2" CACHE STRING "avx2 | avx | basic") + +option(CT_CUBLAS "Use cuBLAS" OFF) +set(CT_CUDA_DMMV_X "32" CACHE STRING "x stride for dmmv CUDA kernels") +set(CT_CUDA_DMMV_Y "1" CACHE STRING "y block size for dmmv CUDA kernels") +set(CT_CUDA_KQUANTS_ITER "2" CACHE STRING "iters/thread per block for Q2_K/Q6_K") + +message(STATUS "CT_INSTRUCTIONS: ${CT_INSTRUCTIONS}") +message(STATUS "CT_CUBLAS: ${CT_CUBLAS}") + +set(BUILD_SHARED_LIBS ON) +set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/$<0:>) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/$<0:>) + +# Compile Flags + +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo") +endif() + +if (NOT MSVC) + set(c_flags + -Wall + -Wextra + -Wpedantic + -Wcast-qual + -Wdouble-promotion + -Wshadow + -Wstrict-prototypes + -Wpointer-arith + ) + set(cxx_flags + # TODO(marella): Add other warnings. + # -Wall + -Wextra + -Wpedantic + -Wcast-qual + -Wno-unused-function + -Wno-multichar + ) +endif() + +add_compile_options( + "$<$:${c_flags}>" + "$<$:${cxx_flags}>" +) + +# Architecture Flags + +if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + message(STATUS "ARM detected") + if (NOT MSVC) + add_compile_options(-mcpu=native) + endif() +else() + message(STATUS "x86 detected") + if (APPLE) + # Universal binary. + set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE) + endif() + + if (MSVC) + if (CT_INSTRUCTIONS STREQUAL "avx2") + add_compile_options($<$:/arch:AVX2>) + add_compile_options($<$:/arch:AVX2>) + elseif (CT_INSTRUCTIONS STREQUAL "avx") + add_compile_options($<$:/arch:AVX>) + add_compile_options($<$:/arch:AVX>) + endif() + else() + if (CT_INSTRUCTIONS STREQUAL "avx2") + add_compile_options(-mfma -mavx2) + add_compile_options(-mf16c -mavx) + elseif (CT_INSTRUCTIONS STREQUAL "avx") + add_compile_options(-mf16c -mavx) + endif() + endif() +endif() + +# Library + +add_library( + ctransformers SHARED + models/llm.cc + models/ggml/ggml.c + models/ggml/k_quants.c +) + +target_include_directories(ctransformers PRIVATE models) +target_link_libraries(ctransformers PRIVATE Threads::Threads) +set_target_properties(ctransformers PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_compile_definitions(ctransformers PRIVATE GGML_USE_K_QUANTS) + +if (APPLE) + find_library(ACCELERATE_FRAMEWORK Accelerate) + if (ACCELERATE_FRAMEWORK) + message(STATUS "Accelerate framework found") + target_link_libraries(ctransformers PRIVATE ${ACCELERATE_FRAMEWORK}) + target_compile_definitions(ctransformers PRIVATE GGML_USE_ACCELERATE) + else() + message(WARNING "Accelerate framework not found") + endif() +endif() + +if (CT_CUBLAS) + find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) + message(STATUS "cuBLAS found") + enable_language(CUDA) + + target_sources(ctransformers PRIVATE models/ggml/ggml-cuda.cu) + target_link_libraries(ctransformers PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt) + + target_compile_definitions(ctransformers PRIVATE GGML_USE_CUBLAS) + target_compile_definitions(ctransformers PRIVATE GGML_CUDA_DMMV_X=${CT_CUDA_DMMV_X}) + target_compile_definitions(ctransformers PRIVATE GGML_CUDA_DMMV_Y=${CT_CUDA_DMMV_Y}) + target_compile_definitions(ctransformers PRIVATE K_QUANTS_PER_ITERATION=${CT_CUDA_KQUANTS_ITER}) + + set_property(TARGET ctransformers PROPERTY CUDA_ARCHITECTURES OFF) + set_property(TARGET ctransformers PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") + else() + message(WARNING "cuBLAS not found") + endif() +endif() + +# scikit-build + +install( + TARGETS ctransformers + LIBRARY DESTINATION ctransformers/lib/local + RUNTIME DESTINATION ctransformers/lib/local +) diff --git a/ctransformers/LICENSE b/ctransformers/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..336b1f83e5d4d30bc9c7be578b83dc21664ebe65 --- /dev/null +++ b/ctransformers/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) Ravindra Marella + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/ctransformers/README.md b/ctransformers/README.md new file mode 100644 index 0000000000000000000000000000000000000000..219d0a557b01d0bbe920286e4d2ded622d1c677f --- /dev/null +++ b/ctransformers/README.md @@ -0,0 +1,495 @@ +# [C Transformers](https://github.com/marella/ctransformers) [![PyPI](https://img.shields.io/pypi/v/ctransformers)](https://pypi.org/project/ctransformers/) [![tests](https://github.com/marella/ctransformers/actions/workflows/tests.yml/badge.svg)](https://github.com/marella/ctransformers/actions/workflows/tests.yml) [![build](https://github.com/marella/ctransformers/actions/workflows/build.yml/badge.svg)](https://github.com/marella/ctransformers/actions/workflows/build.yml) + +Python bindings for the Transformer models implemented in C/C++ using [GGML](https://github.com/ggerganov/ggml) library. + +> Also see [ChatDocs](https://github.com/marella/chatdocs) + +- [Supported Models](#supported-models) +- [Installation](#installation) +- [Usage](#usage) + - [Hugging Face Hub](#hugging-face-hub) + - [LangChain](#langchain) + - [GPU](#gpu) +- [Documentation](#documentation) +- [License](#license) + +## Supported Models + +| Models | Model Type | +| :-------------------- | ----------- | +| GPT-2 | `gpt2` | +| GPT-J, GPT4All-J | `gptj` | +| GPT-NeoX, StableLM | `gpt_neox` | +| LLaMA | `llama` | +| MPT | `mpt` | +| Dolly V2 | `dolly-v2` | +| StarCoder, StarChat | `starcoder` | +| Falcon (Experimental) | `falcon` | + +## Installation + +```sh +pip install ctransformers +``` + +For GPU (CUDA) support, set environment variable `CT_CUBLAS=1` and install from source using: + +```sh +CT_CUBLAS=1 pip install ctransformers --no-binary ctransformers +``` + +
+Show commands for Windows
+ +On Windows PowerShell run: + +```sh +$env:CT_CUBLAS=1 +pip install ctransformers --no-binary ctransformers +``` + +On Windows Command Prompt run: + +```sh +set CT_CUBLAS=1 +pip install ctransformers --no-binary ctransformers +``` + +
+ +## Usage + +It provides a unified interface for all models: + +```py +from ctransformers import AutoModelForCausalLM + +llm = AutoModelForCausalLM.from_pretrained('/path/to/ggml-gpt-2.bin', model_type='gpt2') + +print(llm('AI is going to')) +``` + +[Run in Google Colab](https://colab.research.google.com/drive/1GMhYMUAv_TyZkpfvUI1NirM8-9mCXQyL) + +If you are getting `illegal instruction` error, try using `lib='avx'` or `lib='basic'`: + +```py +llm = AutoModelForCausalLM.from_pretrained('/path/to/ggml-gpt-2.bin', model_type='gpt2', lib='avx') +``` + +It provides a generator interface for more control: + +```py +tokens = llm.tokenize('AI is going to') + +for token in llm.generate(tokens): + print(llm.detokenize(token)) +``` + +It can be used with a custom or Hugging Face tokenizer: + +```py +from transformers import AutoTokenizer + +tokenizer = AutoTokenizer.from_pretrained('gpt2') + +tokens = tokenizer.encode('AI is going to') + +for token in llm.generate(tokens): + print(tokenizer.decode(token)) +``` + +It also provides access to the low-level C API. See [Documentation](#documentation) section below. + +### Hugging Face Hub + +It can be used with models hosted on the Hub: + +```py +llm = AutoModelForCausalLM.from_pretrained('marella/gpt-2-ggml') +``` + +If a model repo has multiple model files (`.bin` files), specify a model file using: + +```py +llm = AutoModelForCausalLM.from_pretrained('marella/gpt-2-ggml', model_file='ggml-model.bin') +``` + +It can be used with your own models uploaded on the Hub. For better user experience, upload only one model per repo. + +To use it with your own model, add `config.json` file to your model repo specifying the `model_type`: + +```json +{ + "model_type": "gpt2" +} +``` + +You can also specify additional parameters under `task_specific_params.text-generation`. + +See [marella/gpt-2-ggml](https://huggingface.co/marella/gpt-2-ggml/blob/main/config.json) for a minimal example and [marella/gpt-2-ggml-example](https://huggingface.co/marella/gpt-2-ggml-example/blob/main/config.json) for a full example. + +### LangChain + +It is integrated into LangChain. See [LangChain docs](https://python.langchain.com/docs/ecosystem/integrations/ctransformers). + +### GPU + +> **Note:** Currently only LLaMA models have GPU support. + +To run some of the model layers on GPU, set the `gpu_layers` parameter: + +```py +llm = AutoModelForCausalLM.from_pretrained('/path/to/ggml-llama.bin', model_type='llama', gpu_layers=50) +``` + +[Run in Google Colab](https://colab.research.google.com/drive/1Ihn7iPCYiqlTotpkqa1tOhUIpJBrJ1Tp) + +## Documentation + + + +### Config + +| Parameter | Type | Description | Default | +| :------------------- | :---------- | :------------------------------------------------------- | :------ | +| `top_k` | `int` | The top-k value to use for sampling. | `40` | +| `top_p` | `float` | The top-p value to use for sampling. | `0.95` | +| `temperature` | `float` | The temperature to use for sampling. | `0.8` | +| `repetition_penalty` | `float` | The repetition penalty to use for sampling. | `1.1` | +| `last_n_tokens` | `int` | The number of last tokens to use for repetition penalty. | `64` | +| `seed` | `int` | The seed value to use for sampling tokens. | `-1` | +| `max_new_tokens` | `int` | The maximum number of new tokens to generate. | `256` | +| `stop` | `List[str]` | A list of sequences to stop generation when encountered. | `None` | +| `stream` | `bool` | Whether to stream the generated text. | `False` | +| `reset` | `bool` | Whether to reset the model state before generating text. | `True` | +| `batch_size` | `int` | The batch size to use for evaluating tokens. | `8` | +| `threads` | `int` | The number of threads to use for evaluating tokens. | `-1` | +| `context_length` | `int` | The maximum context length to use. | `-1` | +| `gpu_layers` | `int` | The number of layers to run on GPU. | `0` | + +> **Note:** Currently only LLaMA and MPT models support the `context_length` parameter and only LLaMA models support the `gpu_layers` parameter. + +### class `AutoModelForCausalLM` + +--- + +#### classmethod `AutoModelForCausalLM.from_pretrained` + +```python +from_pretrained( + model_path_or_repo_id: str, + model_type: Optional[str] = None, + model_file: Optional[str] = None, + config: Optional[ctransformers.hub.AutoConfig] = None, + lib: Optional[str] = None, + local_files_only: bool = False, + **kwargs +) → LLM +``` + +Loads the language model from a local file or remote repo. + +**Args:** + +- `model_path_or_repo_id`: The path to a model file or directory or the name of a Hugging Face Hub model repo. +- `model_type`: The model type. +- `model_file`: The name of the model file in repo or directory. +- `config`: `AutoConfig` object. +- `lib`: The path to a shared library or one of `avx2`, `avx`, `basic`. +- `local_files_only`: Whether or not to only look at local files (i.e., do not try to download the model). + +**Returns:** +`LLM` object. + +### class `LLM` + +### method `LLM.__init__` + +```python +__init__( + model_path: str, + model_type: str, + config: Optional[ctransformers.llm.Config] = None, + lib: Optional[str] = None +) +``` + +Loads the language model from a local file. + +**Args:** + +- `model_path`: The path to a model file. +- `model_type`: The model type. +- `config`: `Config` object. +- `lib`: The path to a shared library or one of `avx2`, `avx`, `basic`. + +--- + +##### property LLM.config + +The config object. + +--- + +##### property LLM.context_length + +The context length of model. + +--- + +##### property LLM.embeddings + +The input embeddings. + +--- + +##### property LLM.eos_token_id + +The end-of-sequence token. + +--- + +##### property LLM.logits + +The unnormalized log probabilities. + +--- + +##### property LLM.model_path + +The path to the model file. + +--- + +##### property LLM.model_type + +The model type. + +--- + +##### property LLM.vocab_size + +The number of tokens in vocabulary. + +--- + +#### method `LLM.detokenize` + +```python +detokenize(tokens: Sequence[int], decode: bool = True) → Union[str, bytes] +``` + +Converts a list of tokens to text. + +**Args:** + +- `tokens`: The list of tokens. +- `decode`: Whether to decode the text as UTF-8 string. + +**Returns:** +The combined text of all tokens. + +--- + +#### method `LLM.embed` + +```python +embed( + input: Union[str, Sequence[int]], + batch_size: Optional[int] = None, + threads: Optional[int] = None +) → List[float] +``` + +Computes embeddings for a text or list of tokens. + +> **Note:** Currently only LLaMA models support embeddings. + +**Args:** + +- `input`: The input text or list of tokens to get embeddings for. +- `batch_size`: The batch size to use for evaluating tokens. Default: `8` +- `threads`: The number of threads to use for evaluating tokens. Default: `-1` + +**Returns:** +The input embeddings. + +--- + +#### method `LLM.eval` + +```python +eval( + tokens: Sequence[int], + batch_size: Optional[int] = None, + threads: Optional[int] = None +) → None +``` + +Evaluates a list of tokens. + +**Args:** + +- `tokens`: The list of tokens to evaluate. +- `batch_size`: The batch size to use for evaluating tokens. Default: `8` +- `threads`: The number of threads to use for evaluating tokens. Default: `-1` + +--- + +#### method `LLM.generate` + +```python +generate( + tokens: Sequence[int], + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + reset: Optional[bool] = None +) → Generator[int, NoneType, NoneType] +``` + +Generates new tokens from a list of tokens. + +**Args:** + +- `tokens`: The list of tokens to generate tokens from. +- `top_k`: The top-k value to use for sampling. Default: `40` +- `top_p`: The top-p value to use for sampling. Default: `0.95` +- `temperature`: The temperature to use for sampling. Default: `0.8` +- `repetition_penalty`: The repetition penalty to use for sampling. Default: `1.1` +- `last_n_tokens`: The number of last tokens to use for repetition penalty. Default: `64` +- `seed`: The seed value to use for sampling tokens. Default: `-1` +- `batch_size`: The batch size to use for evaluating tokens. Default: `8` +- `threads`: The number of threads to use for evaluating tokens. Default: `-1` +- `reset`: Whether to reset the model state before generating text. Default: `True` + +**Returns:** +The generated tokens. + +--- + +#### method `LLM.is_eos_token` + +```python +is_eos_token(token: int) → bool +``` + +Checks if a token is an end-of-sequence token. + +**Args:** + +- `token`: The token to check. + +**Returns:** +`True` if the token is an end-of-sequence token else `False`. + +--- + +#### method `LLM.reset` + +```python +reset() → None +``` + +Resets the model state. + +--- + +#### method `LLM.sample` + +```python +sample( + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None +) → int +``` + +Samples a token from the model. + +**Args:** + +- `top_k`: The top-k value to use for sampling. Default: `40` +- `top_p`: The top-p value to use for sampling. Default: `0.95` +- `temperature`: The temperature to use for sampling. Default: `0.8` +- `repetition_penalty`: The repetition penalty to use for sampling. Default: `1.1` +- `last_n_tokens`: The number of last tokens to use for repetition penalty. Default: `64` +- `seed`: The seed value to use for sampling tokens. Default: `-1` + +**Returns:** +The sampled token. + +--- + +#### method `LLM.tokenize` + +```python +tokenize(text: str) → List[int] +``` + +Converts a text into list of tokens. + +**Args:** + +- `text`: The text to tokenize. + +**Returns:** +The list of tokens. + +--- + +#### method `LLM.__call__` + +```python +__call__( + prompt: str, + max_new_tokens: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + stop: Optional[Sequence[str]] = None, + stream: Optional[bool] = None, + reset: Optional[bool] = None +) → Union[str, Generator[str, NoneType, NoneType]] +``` + +Generates text from a prompt. + +**Args:** + +- `prompt`: The prompt to generate text from. +- `max_new_tokens`: The maximum number of new tokens to generate. Default: `256` +- `top_k`: The top-k value to use for sampling. Default: `40` +- `top_p`: The top-p value to use for sampling. Default: `0.95` +- `temperature`: The temperature to use for sampling. Default: `0.8` +- `repetition_penalty`: The repetition penalty to use for sampling. Default: `1.1` +- `last_n_tokens`: The number of last tokens to use for repetition penalty. Default: `64` +- `seed`: The seed value to use for sampling tokens. Default: `-1` +- `batch_size`: The batch size to use for evaluating tokens. Default: `8` +- `threads`: The number of threads to use for evaluating tokens. Default: `-1` +- `stop`: A list of sequences to stop generation when encountered. Default: `None` +- `stream`: Whether to stream the generated text. Default: `False` +- `reset`: Whether to reset the model state before generating text. Default: `True` + +**Returns:** +The generated text. + + + +## License + +[MIT](https://github.com/marella/ctransformers/blob/main/LICENSE) diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/.ninja_deps b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/.ninja_deps new file mode 100644 index 0000000000000000000000000000000000000000..6da9bc134e1446783caa60e969bcf31d93048cf0 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/.ninja_deps differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/.ninja_log b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/.ninja_log new file mode 100644 index 0000000000000000000000000000000000000000..f06912798286c8d96c24b54835930b14e38d57d9 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/.ninja_log @@ -0,0 +1,7 @@ +# ninja log v5 +12 452 7097429580778985 CMakeFiles/ctransformers.dir/models/ggml/k_quants.c.obj 913e97d76831f776 +7 2037 7097429596632891 CMakeFiles/ctransformers.dir/models/ggml/ggml.c.obj c709d27b2b5bc614 +3 4948 7097429625716154 CMakeFiles/ctransformers.dir/models/llm.cc.obj e8880d324cc1df6 +4949 5411 7097429630446119 lib/ctransformers.dll e53b80a43596c6a5 +4949 5411 7097429630446119 ctransformers.lib e53b80a43596c6a5 +5411 5453 0 CMakeFiles/install.util 94078bbc8092ca8e diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeCache.txt b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeCache.txt new file mode 100644 index 0000000000000000000000000000000000000000..41bc685cb34c32f411e07bae4c92fc4715949048 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeCache.txt @@ -0,0 +1,419 @@ +# This is the CMakeCache file. +# For build in directory: d:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build +# It was generated by CMake: C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/bin/cmake.exe +# You can edit this file to change values found and used by cmake. +# If you do not want to change any of the values, simply exit the editor. +# If you do want to change a value, simply edit, save, and exit the editor. +# The syntax for the file is as follows: +# KEY:TYPE=VALUE +# KEY is the name of a variable in the cache. +# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!. +# VALUE is the current value for the KEY. + +######################## +# EXTERNAL cache entries +######################## + +//Path to a program. +CMAKE_AR:FILEPATH=C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/lib.exe + +//No help, variable specified on the command line. +CMAKE_BUILD_TYPE:STRING=Release + +//CXX compiler +CMAKE_CXX_COMPILER:FILEPATH=C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/cl.exe + +//Flags used by the CXX compiler during all build types. +CMAKE_CXX_FLAGS:STRING=/DWIN32 /D_WINDOWS /GR /EHsc + +//Flags used by the CXX compiler during DEBUG builds. +CMAKE_CXX_FLAGS_DEBUG:STRING=/Zi /Ob0 /Od /RTC1 + +//Flags used by the CXX compiler during MINSIZEREL builds. +CMAKE_CXX_FLAGS_MINSIZEREL:STRING=/O1 /Ob1 /DNDEBUG + +//Flags used by the CXX compiler during RELEASE builds. +CMAKE_CXX_FLAGS_RELEASE:STRING=/O2 /Ob2 /DNDEBUG + +//Flags used by the CXX compiler during RELWITHDEBINFO builds. +CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=/Zi /O2 /Ob1 /DNDEBUG + +//Libraries linked by default with all C++ applications. +CMAKE_CXX_STANDARD_LIBRARIES:STRING=kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib + +//C compiler +CMAKE_C_COMPILER:FILEPATH=C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/cl.exe + +//Flags used by the C compiler during all build types. +CMAKE_C_FLAGS:STRING=/DWIN32 /D_WINDOWS + +//Flags used by the C compiler during DEBUG builds. +CMAKE_C_FLAGS_DEBUG:STRING=/Zi /Ob0 /Od /RTC1 + +//Flags used by the C compiler during MINSIZEREL builds. +CMAKE_C_FLAGS_MINSIZEREL:STRING=/O1 /Ob1 /DNDEBUG + +//Flags used by the C compiler during RELEASE builds. +CMAKE_C_FLAGS_RELEASE:STRING=/O2 /Ob2 /DNDEBUG + +//Flags used by the C compiler during RELWITHDEBINFO builds. +CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=/Zi /O2 /Ob1 /DNDEBUG + +//Libraries linked by default with all C applications. +CMAKE_C_STANDARD_LIBRARIES:STRING=kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib + +//Flags used by the linker during all build types. +CMAKE_EXE_LINKER_FLAGS:STRING=/machine:x64 + +//Flags used by the linker during DEBUG builds. +CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING=/debug /INCREMENTAL + +//Flags used by the linker during MINSIZEREL builds. +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING=/INCREMENTAL:NO + +//Flags used by the linker during RELEASE builds. +CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING=/INCREMENTAL:NO + +//Flags used by the linker during RELWITHDEBINFO builds. +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING=/debug /INCREMENTAL + +//Enable/Disable output of compile commands during generation. +CMAKE_EXPORT_COMPILE_COMMANDS:BOOL= + +//Value Computed by CMake. +CMAKE_FIND_PACKAGE_REDIRECTS_DIR:STATIC=D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/pkgRedirects + +//No help, variable specified on the command line. +CMAKE_INSTALL_PREFIX:PATH=D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-install + +//Path to a program. +CMAKE_LINKER:FILEPATH=C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/link.exe + +//No help, variable specified on the command line. +CMAKE_MAKE_PROGRAM:FILEPATH=C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/ninja/data/bin/ninja + +//Flags used by the linker during the creation of modules during +// all build types. +CMAKE_MODULE_LINKER_FLAGS:STRING=/machine:x64 + +//Flags used by the linker during the creation of modules during +// DEBUG builds. +CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING=/debug /INCREMENTAL + +//Flags used by the linker during the creation of modules during +// MINSIZEREL builds. +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING=/INCREMENTAL:NO + +//Flags used by the linker during the creation of modules during +// RELEASE builds. +CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING=/INCREMENTAL:NO + +//Flags used by the linker during the creation of modules during +// RELWITHDEBINFO builds. +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING=/debug /INCREMENTAL + +//No help, variable specified on the command line. +CMAKE_MODULE_PATH:PATH=C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/skbuild/resources/cmake + +//Path to a program. +CMAKE_MT:FILEPATH=C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x86/mt.exe + +//Value Computed by CMake +CMAKE_PROJECT_DESCRIPTION:STATIC= + +//Value Computed by CMake +CMAKE_PROJECT_HOMEPAGE_URL:STATIC= + +//Value Computed by CMake +CMAKE_PROJECT_NAME:STATIC=ctransformers + +//RC compiler +CMAKE_RC_COMPILER:FILEPATH=C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x86/rc.exe + +//Flags for Windows Resource Compiler during all build types. +CMAKE_RC_FLAGS:STRING=-DWIN32 + +//Flags for Windows Resource Compiler during DEBUG builds. +CMAKE_RC_FLAGS_DEBUG:STRING=-D_DEBUG + +//Flags for Windows Resource Compiler during MINSIZEREL builds. +CMAKE_RC_FLAGS_MINSIZEREL:STRING= + +//Flags for Windows Resource Compiler during RELEASE builds. +CMAKE_RC_FLAGS_RELEASE:STRING= + +//Flags for Windows Resource Compiler during RELWITHDEBINFO builds. +CMAKE_RC_FLAGS_RELWITHDEBINFO:STRING= + +//Flags used by the linker during the creation of shared libraries +// during all build types. +CMAKE_SHARED_LINKER_FLAGS:STRING=/machine:x64 + +//Flags used by the linker during the creation of shared libraries +// during DEBUG builds. +CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING=/debug /INCREMENTAL + +//Flags used by the linker during the creation of shared libraries +// during MINSIZEREL builds. +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING=/INCREMENTAL:NO + +//Flags used by the linker during the creation of shared libraries +// during RELEASE builds. +CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING=/INCREMENTAL:NO + +//Flags used by the linker during the creation of shared libraries +// during RELWITHDEBINFO builds. +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING=/debug /INCREMENTAL + +//If set, runtime paths are not added when installing shared libraries, +// but are added when building. +CMAKE_SKIP_INSTALL_RPATH:BOOL=NO + +//If set, runtime paths are not added when using shared libraries. +CMAKE_SKIP_RPATH:BOOL=NO + +//Flags used by the linker during the creation of static libraries +// during all build types. +CMAKE_STATIC_LINKER_FLAGS:STRING=/machine:x64 + +//Flags used by the linker during the creation of static libraries +// during DEBUG builds. +CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during the creation of static libraries +// during MINSIZEREL builds. +CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during the creation of static libraries +// during RELEASE builds. +CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during the creation of static libraries +// during RELWITHDEBINFO builds. +CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//If this value is on, makefiles will be generated without the +// .SILENT directive, and all commands will be echoed to the console +// during the make. This is useful for debugging only. With Visual +// Studio IDE projects all commands are done without /nologo. +CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE + +//Use cuBLAS +CT_CUBLAS:BOOL=OFF + +//x stride for dmmv CUDA kernels +CT_CUDA_DMMV_X:STRING=32 + +//y block size for dmmv CUDA kernels +CT_CUDA_DMMV_Y:STRING=1 + +//iters/thread per block for Q2_K/Q6_K +CT_CUDA_KQUANTS_ITER:STRING=2 + +//avx2 | avx | basic +CT_INSTRUCTIONS:STRING=avx2 + +//No help, variable specified on the command line. +PYTHON_EXECUTABLE:PATH=D:/Projects/Replit 3b/replit-3B-inference/env/Scripts/python.exe + +//No help, variable specified on the command line. +PYTHON_INCLUDE_DIR:PATH=C:/Users/milin/AppData/Local/Programs/Python/Python311/Include + +//No help, variable specified on the command line. +PYTHON_LIBRARY:PATH=C:/Users/milin/AppData/Local/Programs/Python/Python311/libs/python311.lib + +//No help, variable specified on the command line. +PYTHON_VERSION_STRING:STRING=3.11.4 + +//No help, variable specified on the command line. +Python3_EXECUTABLE:PATH=D:/Projects/Replit 3b/replit-3B-inference/env/Scripts/python.exe + +//No help, variable specified on the command line. +Python3_FIND_REGISTRY:STRING=NEVER + +//No help, variable specified on the command line. +Python3_INCLUDE_DIR:PATH=C:/Users/milin/AppData/Local/Programs/Python/Python311/Include + +//No help, variable specified on the command line. +Python3_LIBRARY:PATH=C:/Users/milin/AppData/Local/Programs/Python/Python311/libs/python311.lib + +//No help, variable specified on the command line. +Python3_ROOT_DIR:PATH=D:/Projects/Replit 3b/replit-3B-inference/env + +//No help, variable specified on the command line. +Python_EXECUTABLE:PATH=D:/Projects/Replit 3b/replit-3B-inference/env/Scripts/python.exe + +//No help, variable specified on the command line. +Python_FIND_REGISTRY:STRING=NEVER + +//No help, variable specified on the command line. +Python_INCLUDE_DIR:PATH=C:/Users/milin/AppData/Local/Programs/Python/Python311/Include + +//No help, variable specified on the command line. +Python_LIBRARY:PATH=C:/Users/milin/AppData/Local/Programs/Python/Python311/libs/python311.lib + +//No help, variable specified on the command line. +Python_ROOT_DIR:PATH=D:/Projects/Replit 3b/replit-3B-inference/env + +//No help, variable specified on the command line. +_SKBUILD_FORCE_MSVC:UNINITIALIZED=1920 + +//Value Computed by CMake +ctransformers_BINARY_DIR:STATIC=D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build + +//Value Computed by CMake +ctransformers_IS_TOP_LEVEL:STATIC=ON + +//Value Computed by CMake +ctransformers_SOURCE_DIR:STATIC=D:/Projects/Replit 3b/replit-3B-inference/ctransformers + + +######################## +# INTERNAL cache entries +######################## + +//ADVANCED property for variable: CMAKE_AR +CMAKE_AR-ADVANCED:INTERNAL=1 +//This is the directory where this CMakeCache.txt was created +CMAKE_CACHEFILE_DIR:INTERNAL=d:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build +//Major version of cmake used to create the current loaded cache +CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3 +//Minor version of cmake used to create the current loaded cache +CMAKE_CACHE_MINOR_VERSION:INTERNAL=26 +//Patch version of cmake used to create the current loaded cache +CMAKE_CACHE_PATCH_VERSION:INTERNAL=4 +//Path to CMake executable. +CMAKE_COMMAND:INTERNAL=C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/bin/cmake.exe +//Path to cpack program executable. +CMAKE_CPACK_COMMAND:INTERNAL=C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/bin/cpack.exe +//Path to ctest program executable. +CMAKE_CTEST_COMMAND:INTERNAL=C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/bin/ctest.exe +//ADVANCED property for variable: CMAKE_CXX_COMPILER +CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS +CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG +CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL +CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE +CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO +CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_STANDARD_LIBRARIES +CMAKE_CXX_STANDARD_LIBRARIES-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_COMPILER +CMAKE_C_COMPILER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS +CMAKE_C_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_DEBUG +CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_MINSIZEREL +CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_RELEASE +CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_FLAGS_RELWITHDEBINFO +CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_C_STANDARD_LIBRARIES +CMAKE_C_STANDARD_LIBRARIES-ADVANCED:INTERNAL=1 +//Path to cache edit program executable. +CMAKE_EDIT_COMMAND:INTERNAL=C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/bin/cmake-gui.exe +//Executable file format +CMAKE_EXECUTABLE_FORMAT:INTERNAL=Unknown +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS +CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG +CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE +CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS +CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1 +//Name of external makefile project generator. +CMAKE_EXTRA_GENERATOR:INTERNAL= +//Name of generator. +CMAKE_GENERATOR:INTERNAL=Ninja +//Generator instance identifier. +CMAKE_GENERATOR_INSTANCE:INTERNAL= +//Name of generator platform. +CMAKE_GENERATOR_PLATFORM:INTERNAL= +//Name of generator toolset. +CMAKE_GENERATOR_TOOLSET:INTERNAL= +//Test CMAKE_HAVE_LIBC_PTHREAD +CMAKE_HAVE_LIBC_PTHREAD:INTERNAL= +//Have library pthreads +CMAKE_HAVE_PTHREADS_CREATE:INTERNAL= +//Have library pthread +CMAKE_HAVE_PTHREAD_CREATE:INTERNAL= +//Source directory with the top level CMakeLists.txt file for this +// project +CMAKE_HOME_DIRECTORY:INTERNAL=D:/Projects/Replit 3b/replit-3B-inference/ctransformers +//ADVANCED property for variable: CMAKE_LINKER +CMAKE_LINKER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS +CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG +CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE +CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MT +CMAKE_MT-ADVANCED:INTERNAL=1 +//number of local generators +CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=1 +//Platform information initialized +CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1 +//noop for ranlib +CMAKE_RANLIB:INTERNAL=: +//ADVANCED property for variable: CMAKE_RC_COMPILER +CMAKE_RC_COMPILER-ADVANCED:INTERNAL=1 +CMAKE_RC_COMPILER_WORKS:INTERNAL=1 +//ADVANCED property for variable: CMAKE_RC_FLAGS +CMAKE_RC_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_RC_FLAGS_DEBUG +CMAKE_RC_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_RC_FLAGS_MINSIZEREL +CMAKE_RC_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_RC_FLAGS_RELEASE +CMAKE_RC_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_RC_FLAGS_RELWITHDEBINFO +CMAKE_RC_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Path to CMake installation. +CMAKE_ROOT:INTERNAL=C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS +CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG +CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE +CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH +CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SKIP_RPATH +CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS +CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG +CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL +CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE +CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE +CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1 +//Details about finding Threads +FIND_PACKAGE_MESSAGE_DETAILS_Threads:INTERNAL=[TRUE][v()] +//No help, variable specified on the command line. +SKBUILD:INTERNAL=TRUE + diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeCCompiler.cmake b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeCCompiler.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8a7bb57cdac3c5bfb3f1f84096558a880bddd492 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeCCompiler.cmake @@ -0,0 +1,72 @@ +set(CMAKE_C_COMPILER "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/cl.exe") +set(CMAKE_C_COMPILER_ARG1 "") +set(CMAKE_C_COMPILER_ID "MSVC") +set(CMAKE_C_COMPILER_VERSION "19.29.30151.0") +set(CMAKE_C_COMPILER_VERSION_INTERNAL "") +set(CMAKE_C_COMPILER_WRAPPER "") +set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "90") +set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "OFF") +set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17") +set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes") +set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros") +set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert") +set(CMAKE_C17_COMPILE_FEATURES "c_std_17") +set(CMAKE_C23_COMPILE_FEATURES "") + +set(CMAKE_C_PLATFORM_ID "Windows") +set(CMAKE_C_SIMULATE_ID "") +set(CMAKE_C_COMPILER_FRONTEND_VARIANT "MSVC") +set(CMAKE_C_SIMULATE_VERSION "") +set(CMAKE_C_COMPILER_ARCHITECTURE_ID x64) + +set(MSVC_C_ARCHITECTURE_ID x64) + +set(CMAKE_AR "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/lib.exe") +set(CMAKE_C_COMPILER_AR "") +set(CMAKE_RANLIB ":") +set(CMAKE_C_COMPILER_RANLIB "") +set(CMAKE_LINKER "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/link.exe") +set(CMAKE_MT "C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x86/mt.exe") +set(CMAKE_COMPILER_IS_GNUCC ) +set(CMAKE_C_COMPILER_LOADED 1) +set(CMAKE_C_COMPILER_WORKS TRUE) +set(CMAKE_C_ABI_COMPILED TRUE) + +set(CMAKE_C_COMPILER_ENV_VAR "CC") + +set(CMAKE_C_COMPILER_ID_RUN 1) +set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m) +set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) +set(CMAKE_C_LINKER_PREFERENCE 10) + +# Save compiler ABI information. +set(CMAKE_C_SIZEOF_DATA_PTR "8") +set(CMAKE_C_COMPILER_ABI "") +set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN") +set(CMAKE_C_LIBRARY_ARCHITECTURE "") + +if(CMAKE_C_SIZEOF_DATA_PTR) + set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") +endif() + +if(CMAKE_C_COMPILER_ABI) + set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") +endif() + +if(CMAKE_C_LIBRARY_ARCHITECTURE) + set(CMAKE_LIBRARY_ARCHITECTURE "") +endif() + +set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "Note: including file: ") +if(CMAKE_C_CL_SHOWINCLUDES_PREFIX) + set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}") +endif() + + + + + +set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "") +set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "") +set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "") +set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeCXXCompiler.cmake b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeCXXCompiler.cmake new file mode 100644 index 0000000000000000000000000000000000000000..eb9a56fffeb01367373a9a9ba5d3f5147368647c --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeCXXCompiler.cmake @@ -0,0 +1,83 @@ +set(CMAKE_CXX_COMPILER "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/cl.exe") +set(CMAKE_CXX_COMPILER_ARG1 "") +set(CMAKE_CXX_COMPILER_ID "MSVC") +set(CMAKE_CXX_COMPILER_VERSION "19.29.30151.0") +set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "") +set(CMAKE_CXX_COMPILER_WRAPPER "") +set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "14") +set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "OFF") +set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23") +set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters") +set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates") +set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates") +set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17") +set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20") +set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23") + +set(CMAKE_CXX_PLATFORM_ID "Windows") +set(CMAKE_CXX_SIMULATE_ID "") +set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "MSVC") +set(CMAKE_CXX_SIMULATE_VERSION "") +set(CMAKE_CXX_COMPILER_ARCHITECTURE_ID x64) + +set(MSVC_CXX_ARCHITECTURE_ID x64) + +set(CMAKE_AR "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/lib.exe") +set(CMAKE_CXX_COMPILER_AR "") +set(CMAKE_RANLIB ":") +set(CMAKE_CXX_COMPILER_RANLIB "") +set(CMAKE_LINKER "C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/link.exe") +set(CMAKE_MT "C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x86/mt.exe") +set(CMAKE_COMPILER_IS_GNUCXX ) +set(CMAKE_CXX_COMPILER_LOADED 1) +set(CMAKE_CXX_COMPILER_WORKS TRUE) +set(CMAKE_CXX_ABI_COMPILED TRUE) + +set(CMAKE_CXX_COMPILER_ENV_VAR "CXX") + +set(CMAKE_CXX_COMPILER_ID_RUN 1) +set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm) +set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC) + +foreach (lang C OBJC OBJCXX) + if (CMAKE_${lang}_COMPILER_ID_RUN) + foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS) + list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension}) + endforeach() + endif() +endforeach() + +set(CMAKE_CXX_LINKER_PREFERENCE 30) +set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1) + +# Save compiler ABI information. +set(CMAKE_CXX_SIZEOF_DATA_PTR "8") +set(CMAKE_CXX_COMPILER_ABI "") +set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN") +set(CMAKE_CXX_LIBRARY_ARCHITECTURE "") + +if(CMAKE_CXX_SIZEOF_DATA_PTR) + set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") +endif() + +if(CMAKE_CXX_COMPILER_ABI) + set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") +endif() + +if(CMAKE_CXX_LIBRARY_ARCHITECTURE) + set(CMAKE_LIBRARY_ARCHITECTURE "") +endif() + +set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "Note: including file: ") +if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX) + set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}") +endif() + + + + + +set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "") +set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "") +set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "") +set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeDetermineCompilerABI_C.bin b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeDetermineCompilerABI_C.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6f8cd7a337d199bf1e3fabd1d150c5579f8f0bb --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeDetermineCompilerABI_C.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2ea5a3b9e5a82c91fc63f50856bb4f7ca5aae16ac8487e33db8c9e949637cc +size 51200 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeDetermineCompilerABI_CXX.bin b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeDetermineCompilerABI_CXX.bin new file mode 100644 index 0000000000000000000000000000000000000000..8181d1e33c888ec1825294f71667edc0174f5873 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeDetermineCompilerABI_CXX.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c63ae1c9c954d16127d3338ac5cbaa118370cc877be5fc72aa56a0ee3a8417cf +size 51200 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeRCCompiler.cmake b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeRCCompiler.cmake new file mode 100644 index 0000000000000000000000000000000000000000..85d7bff66ddd4c0210fed983a4c64285c3a6f7e4 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeRCCompiler.cmake @@ -0,0 +1,6 @@ +set(CMAKE_RC_COMPILER "C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x86/rc.exe") +set(CMAKE_RC_COMPILER_ARG1 "") +set(CMAKE_RC_COMPILER_LOADED 1) +set(CMAKE_RC_SOURCE_FILE_EXTENSIONS rc;RC) +set(CMAKE_RC_OUTPUT_EXTENSION .res) +set(CMAKE_RC_COMPILER_ENV_VAR "RC") diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeSystem.cmake b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeSystem.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e1deade366d7d6525a55da772a9e60913d4b7005 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CMakeSystem.cmake @@ -0,0 +1,15 @@ +set(CMAKE_HOST_SYSTEM "Windows-10.0.22621") +set(CMAKE_HOST_SYSTEM_NAME "Windows") +set(CMAKE_HOST_SYSTEM_VERSION "10.0.22621") +set(CMAKE_HOST_SYSTEM_PROCESSOR "AMD64") + + + +set(CMAKE_SYSTEM "Windows-10.0.22621") +set(CMAKE_SYSTEM_NAME "Windows") +set(CMAKE_SYSTEM_VERSION "10.0.22621") +set(CMAKE_SYSTEM_PROCESSOR "AMD64") + +set(CMAKE_CROSSCOMPILING "FALSE") + +set(CMAKE_SYSTEM_LOADED 1) diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.c b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.c new file mode 100644 index 0000000000000000000000000000000000000000..88155ff20cb6a788920d888b832d81cfa22cdd8d --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.c @@ -0,0 +1,866 @@ +#ifdef __cplusplus +# error "A C++ compiler has been selected for C." +#endif + +#if defined(__18CXX) +# define ID_VOID_MAIN +#endif +#if defined(__CLASSIC_C__) +/* cv-qualifiers did not exist in K&R C */ +# define const +# define volatile +#endif + +#if !defined(__has_include) +/* If the compiler does not have __has_include, pretend the answer is + always no. */ +# define __has_include(x) 0 +#endif + + +/* Version number components: V=Version, R=Revision, P=Patch + Version date components: YYYY=Year, MM=Month, DD=Day */ + +#if defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# if defined(__GNUC__) +# define SIMULATE_ID "GNU" +# endif + /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later, + except that a few beta releases use the old format with V=2021. */ +# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111 +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) +# if defined(__INTEL_COMPILER_UPDATE) +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) +# else +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) +# endif +# else +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE) + /* The third version component from --version is an update index, + but no macro is provided for it. */ +# define COMPILER_VERSION_PATCH DEC(0) +# endif +# if defined(__INTEL_COMPILER_BUILD_DATE) + /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ +# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) +# endif +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +# elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +# endif +# if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER) +# define COMPILER_ID "IntelLLVM" +#if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +#endif +#if defined(__GNUC__) +# define SIMULATE_ID "GNU" +#endif +/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and + * later. Look for 6 digit vs. 8 digit version number to decide encoding. + * VVVV is no smaller than the current year when a version is released. + */ +#if __INTEL_LLVM_COMPILER < 1000000L +# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10) +#else +# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000) +# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100) +#endif +#if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +#endif +#if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +#elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +#endif +#if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +#endif +#if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +#endif + +#elif defined(__PATHCC__) +# define COMPILER_ID "PathScale" +# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) +# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) +# if defined(__PATHCC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) +# endif + +#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) +# define COMPILER_ID "Embarcadero" +# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) +# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) +# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + /* __BORLANDC__ = 0xVRR */ +# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) +# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) + +#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 +# define COMPILER_ID "Watcom" + /* __WATCOMC__ = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__WATCOMC__) +# define COMPILER_ID "OpenWatcom" + /* __WATCOMC__ = VVRP + 1100 */ +# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__SUNPRO_C) +# define COMPILER_ID "SunPro" +# if __SUNPRO_C >= 0x5100 + /* __SUNPRO_C = 0xVRRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>12) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) +# else + /* __SUNPRO_CC = 0xVRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>8) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) +# endif + +#elif defined(__HP_cc) +# define COMPILER_ID "HP" + /* __HP_cc = VVRRPP */ +# define COMPILER_VERSION_MAJOR DEC(__HP_cc/10000) +# define COMPILER_VERSION_MINOR DEC(__HP_cc/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__HP_cc % 100) + +#elif defined(__DECC) +# define COMPILER_ID "Compaq" + /* __DECC_VER = VVRRTPPPP */ +# define COMPILER_VERSION_MAJOR DEC(__DECC_VER/10000000) +# define COMPILER_VERSION_MINOR DEC(__DECC_VER/100000 % 100) +# define COMPILER_VERSION_PATCH DEC(__DECC_VER % 10000) + +#elif defined(__IBMC__) && defined(__COMPILER_VER__) +# define COMPILER_ID "zOS" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__open_xl__) && defined(__clang__) +# define COMPILER_ID "IBMClang" +# define COMPILER_VERSION_MAJOR DEC(__open_xl_version__) +# define COMPILER_VERSION_MINOR DEC(__open_xl_release__) +# define COMPILER_VERSION_PATCH DEC(__open_xl_modification__) +# define COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__) + + +#elif defined(__ibmxl__) && defined(__clang__) +# define COMPILER_ID "XLClang" +# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__) +# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__) +# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__) +# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__) + + +#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800 +# define COMPILER_ID "XL" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800 +# define COMPILER_ID "VisualAge" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__NVCOMPILER) +# define COMPILER_ID "NVHPC" +# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__) +# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__) +# if defined(__NVCOMPILER_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__) +# endif + +#elif defined(__PGI) +# define COMPILER_ID "PGI" +# define COMPILER_VERSION_MAJOR DEC(__PGIC__) +# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) +# endif + +#elif defined(_CRAYC) +# define COMPILER_ID "Cray" +# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) +# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) + +#elif defined(__TI_COMPILER_VERSION__) +# define COMPILER_ID "TI" + /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ +# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) +# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) +# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) + +#elif defined(__CLANG_FUJITSU) +# define COMPILER_ID "FujitsuClang" +# define COMPILER_VERSION_MAJOR DEC(__FCC_major__) +# define COMPILER_VERSION_MINOR DEC(__FCC_minor__) +# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__) +# define COMPILER_VERSION_INTERNAL_STR __clang_version__ + + +#elif defined(__FUJITSU) +# define COMPILER_ID "Fujitsu" +# if defined(__FCC_version__) +# define COMPILER_VERSION __FCC_version__ +# elif defined(__FCC_major__) +# define COMPILER_VERSION_MAJOR DEC(__FCC_major__) +# define COMPILER_VERSION_MINOR DEC(__FCC_minor__) +# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__) +# endif +# if defined(__fcc_version) +# define COMPILER_VERSION_INTERNAL DEC(__fcc_version) +# elif defined(__FCC_VERSION) +# define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION) +# endif + + +#elif defined(__ghs__) +# define COMPILER_ID "GHS" +/* __GHS_VERSION_NUMBER = VVVVRP */ +# ifdef __GHS_VERSION_NUMBER +# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100) +# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10) +# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10) +# endif + +#elif defined(__TASKING__) +# define COMPILER_ID "Tasking" + # define COMPILER_VERSION_MAJOR DEC(__VERSION__/1000) + # define COMPILER_VERSION_MINOR DEC(__VERSION__ % 100) +# define COMPILER_VERSION_INTERNAL DEC(__VERSION__) + +#elif defined(__TINYC__) +# define COMPILER_ID "TinyCC" + +#elif defined(__BCC__) +# define COMPILER_ID "Bruce" + +#elif defined(__SCO_VERSION__) +# define COMPILER_ID "SCO" + +#elif defined(__ARMCC_VERSION) && !defined(__clang__) +# define COMPILER_ID "ARMCC" +#if __ARMCC_VERSION >= 1000000 + /* __ARMCC_VERSION = VRRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#else + /* __ARMCC_VERSION = VRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#endif + + +#elif defined(__clang__) && defined(__apple_build_version__) +# define COMPILER_ID "AppleClang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) + +#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION) +# define COMPILER_ID "ARMClang" + # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION % 10000) +# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION) + +#elif defined(__clang__) +# define COMPILER_ID "Clang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__)) +# define COMPILER_ID "LCC" +# define COMPILER_VERSION_MAJOR DEC(__LCC__ / 100) +# define COMPILER_VERSION_MINOR DEC(__LCC__ % 100) +# if defined(__LCC_MINOR__) +# define COMPILER_VERSION_PATCH DEC(__LCC_MINOR__) +# endif +# if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define SIMULATE_ID "GNU" +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +# if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif +# endif + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" +# define COMPILER_VERSION_MAJOR DEC(__GNUC__) +# if defined(__GNUC_MINOR__) +# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + /* _MSC_VER = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) +# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) +# if defined(_MSC_FULL_VER) +# if _MSC_VER >= 1400 + /* _MSC_FULL_VER = VVRRPPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) +# else + /* _MSC_FULL_VER = VVRRPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) +# endif +# endif +# if defined(_MSC_BUILD) +# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) +# endif + +#elif defined(_ADI_COMPILER) +# define COMPILER_ID "ADSP" +#if defined(__VERSIONNUM__) + /* __VERSIONNUM__ = 0xVVRRPPTT */ +# define COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF) +# define COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF) +# define COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF) +# define COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF) +#endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# define COMPILER_ID "IAR" +# if defined(__VER__) && defined(__ICCARM__) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000) +# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000) +# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__)) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100) +# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100)) +# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# endif + +#elif defined(__SDCC_VERSION_MAJOR) || defined(SDCC) +# define COMPILER_ID "SDCC" +# if defined(__SDCC_VERSION_MAJOR) +# define COMPILER_VERSION_MAJOR DEC(__SDCC_VERSION_MAJOR) +# define COMPILER_VERSION_MINOR DEC(__SDCC_VERSION_MINOR) +# define COMPILER_VERSION_PATCH DEC(__SDCC_VERSION_PATCH) +# else + /* SDCC = VRP */ +# define COMPILER_VERSION_MAJOR DEC(SDCC/100) +# define COMPILER_VERSION_MINOR DEC(SDCC/10 % 10) +# define COMPILER_VERSION_PATCH DEC(SDCC % 10) +# endif + + +/* These compilers are either not known or too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; +#ifdef SIMULATE_ID +char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; +#endif + +#ifdef __QNXNTO__ +char const* qnxnto = "INFO" ":" "qnxnto[]"; +#endif + +#if defined(__CRAYXT_COMPUTE_LINUX_TARGET) +char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; +#endif + +#define STRINGIFY_HELPER(X) #X +#define STRINGIFY(X) STRINGIFY_HELPER(X) + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__MSYS__) +# define PLATFORM_ID "MSYS" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU__) +# define PLATFORM_ID "Haiku" + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#elif defined(__WATCOMC__) +# if defined(__LINUX__) +# define PLATFORM_ID "Linux" + +# elif defined(__DOS__) +# define PLATFORM_ID "DOS" + +# elif defined(__OS2__) +# define PLATFORM_ID "OS2" + +# elif defined(__WINDOWS__) +# define PLATFORM_ID "Windows3x" + +# elif defined(__VXWORKS__) +# define PLATFORM_ID "VxWorks" + +# else /* unknown platform */ +# define PLATFORM_ID +# endif + +#elif defined(__INTEGRITY) +# if defined(INT_178B) +# define PLATFORM_ID "Integrity178" + +# else /* regular Integrity */ +# define PLATFORM_ID "Integrity" +# endif + +# elif defined(_ADI_COMPILER) +# define PLATFORM_ID "ADSP" + +#else /* unknown platform */ +# define PLATFORM_ID + +#endif + +/* For windows compilers MSVC and Intel we can determine + the architecture of the compiler being used. This is because + the compilers do not have flags that can change the architecture, + but rather depend on which compiler is being used +*/ +#if defined(_WIN32) && defined(_MSC_VER) +# if defined(_M_IA64) +# define ARCHITECTURE_ID "IA64" + +# elif defined(_M_ARM64EC) +# define ARCHITECTURE_ID "ARM64EC" + +# elif defined(_M_X64) || defined(_M_AMD64) +# define ARCHITECTURE_ID "x64" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# elif defined(_M_ARM64) +# define ARCHITECTURE_ID "ARM64" + +# elif defined(_M_ARM) +# if _M_ARM == 4 +# define ARCHITECTURE_ID "ARMV4I" +# elif _M_ARM == 5 +# define ARCHITECTURE_ID "ARMV5I" +# else +# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) +# endif + +# elif defined(_M_MIPS) +# define ARCHITECTURE_ID "MIPS" + +# elif defined(_M_SH) +# define ARCHITECTURE_ID "SHx" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__WATCOMC__) +# if defined(_M_I86) +# define ARCHITECTURE_ID "I86" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# if defined(__ICCARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__ICCRX__) +# define ARCHITECTURE_ID "RX" + +# elif defined(__ICCRH850__) +# define ARCHITECTURE_ID "RH850" + +# elif defined(__ICCRL78__) +# define ARCHITECTURE_ID "RL78" + +# elif defined(__ICCRISCV__) +# define ARCHITECTURE_ID "RISCV" + +# elif defined(__ICCAVR__) +# define ARCHITECTURE_ID "AVR" + +# elif defined(__ICC430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__ICCV850__) +# define ARCHITECTURE_ID "V850" + +# elif defined(__ICC8051__) +# define ARCHITECTURE_ID "8051" + +# elif defined(__ICCSTM8__) +# define ARCHITECTURE_ID "STM8" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__ghs__) +# if defined(__PPC64__) +# define ARCHITECTURE_ID "PPC64" + +# elif defined(__ppc__) +# define ARCHITECTURE_ID "PPC" + +# elif defined(__ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__x86_64__) +# define ARCHITECTURE_ID "x64" + +# elif defined(__i386__) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__TI_COMPILER_VERSION__) +# if defined(__TI_ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__MSP430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__TMS320C28XX__) +# define ARCHITECTURE_ID "TMS320C28x" + +# elif defined(__TMS320C6X__) || defined(_TMS320C6X) +# define ARCHITECTURE_ID "TMS320C6x" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +# elif defined(__ADSPSHARC__) +# define ARCHITECTURE_ID "SHARC" + +# elif defined(__ADSPBLACKFIN__) +# define ARCHITECTURE_ID "Blackfin" + +#elif defined(__TASKING__) + +# if defined(__CTC__) || defined(__CPTC__) +# define ARCHITECTURE_ID "TriCore" + +# elif defined(__CMCS__) +# define ARCHITECTURE_ID "MCS" + +# elif defined(__CARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__CARC__) +# define ARCHITECTURE_ID "ARC" + +# elif defined(__C51__) +# define ARCHITECTURE_ID "8051" + +# elif defined(__CPCP__) +# define ARCHITECTURE_ID "PCP" + +# else +# define ARCHITECTURE_ID "" +# endif + +#else +# define ARCHITECTURE_ID +#endif + +/* Convert integer to decimal digit literals. */ +#define DEC(n) \ + ('0' + (((n) / 10000000)%10)), \ + ('0' + (((n) / 1000000)%10)), \ + ('0' + (((n) / 100000)%10)), \ + ('0' + (((n) / 10000)%10)), \ + ('0' + (((n) / 1000)%10)), \ + ('0' + (((n) / 100)%10)), \ + ('0' + (((n) / 10)%10)), \ + ('0' + ((n) % 10)) + +/* Convert integer to hex digit literals. */ +#define HEX(n) \ + ('0' + ((n)>>28 & 0xF)), \ + ('0' + ((n)>>24 & 0xF)), \ + ('0' + ((n)>>20 & 0xF)), \ + ('0' + ((n)>>16 & 0xF)), \ + ('0' + ((n)>>12 & 0xF)), \ + ('0' + ((n)>>8 & 0xF)), \ + ('0' + ((n)>>4 & 0xF)), \ + ('0' + ((n) & 0xF)) + +/* Construct a string literal encoding the version number. */ +#ifdef COMPILER_VERSION +char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]"; + +/* Construct a string literal encoding the version number components. */ +#elif defined(COMPILER_VERSION_MAJOR) +char const info_version[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', + COMPILER_VERSION_MAJOR, +# ifdef COMPILER_VERSION_MINOR + '.', COMPILER_VERSION_MINOR, +# ifdef COMPILER_VERSION_PATCH + '.', COMPILER_VERSION_PATCH, +# ifdef COMPILER_VERSION_TWEAK + '.', COMPILER_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct a string literal encoding the internal version number. */ +#ifdef COMPILER_VERSION_INTERNAL +char const info_version_internal[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_', + 'i','n','t','e','r','n','a','l','[', + COMPILER_VERSION_INTERNAL,']','\0'}; +#elif defined(COMPILER_VERSION_INTERNAL_STR) +char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]"; +#endif + +/* Construct a string literal encoding the version number components. */ +#ifdef SIMULATE_VERSION_MAJOR +char const info_simulate_version[] = { + 'I', 'N', 'F', 'O', ':', + 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', + SIMULATE_VERSION_MAJOR, +# ifdef SIMULATE_VERSION_MINOR + '.', SIMULATE_VERSION_MINOR, +# ifdef SIMULATE_VERSION_PATCH + '.', SIMULATE_VERSION_PATCH, +# ifdef SIMULATE_VERSION_TWEAK + '.', SIMULATE_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; +char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; + + + +#if !defined(__STDC__) && !defined(__clang__) +# if defined(_MSC_VER) || defined(__ibmxl__) || defined(__IBMC__) +# define C_VERSION "90" +# else +# define C_VERSION +# endif +#elif __STDC_VERSION__ > 201710L +# define C_VERSION "23" +#elif __STDC_VERSION__ >= 201710L +# define C_VERSION "17" +#elif __STDC_VERSION__ >= 201000L +# define C_VERSION "11" +#elif __STDC_VERSION__ >= 199901L +# define C_VERSION "99" +#else +# define C_VERSION "90" +#endif +const char* info_language_standard_default = + "INFO" ":" "standard_default[" C_VERSION "]"; + +const char* info_language_extensions_default = "INFO" ":" "extensions_default[" +#if (defined(__clang__) || defined(__GNUC__) || defined(__xlC__) || \ + defined(__TI_COMPILER_VERSION__)) && \ + !defined(__STRICT_ANSI__) + "ON" +#else + "OFF" +#endif +"]"; + +/*--------------------------------------------------------------------------*/ + +#ifdef ID_VOID_MAIN +void main() {} +#else +# if defined(__CLASSIC_C__) +int main(argc, argv) int argc; char *argv[]; +# else +int main(int argc, char* argv[]) +# endif +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; + require += info_arch[argc]; +#ifdef COMPILER_VERSION_MAJOR + require += info_version[argc]; +#endif +#ifdef COMPILER_VERSION_INTERNAL + require += info_version_internal[argc]; +#endif +#ifdef SIMULATE_ID + require += info_simulate[argc]; +#endif +#ifdef SIMULATE_VERSION_MAJOR + require += info_simulate_version[argc]; +#endif +#if defined(__CRAYXT_COMPUTE_LINUX_TARGET) + require += info_cray[argc]; +#endif + require += info_language_standard_default[argc]; + require += info_language_extensions_default[argc]; + (void)argv; + return require; +} +#endif diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.exe b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.exe new file mode 100644 index 0000000000000000000000000000000000000000..b5d65e94f06285d735b63b485e2c0e853449c873 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.exe differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.obj b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.obj new file mode 100644 index 0000000000000000000000000000000000000000..2cff4c09865ecc4657b24312e9d7f64952a747ab Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.obj differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.cpp b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.cpp new file mode 100644 index 0000000000000000000000000000000000000000..746b1672e6408e180134871066e63c54fb6e474d --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.cpp @@ -0,0 +1,855 @@ +/* This source file must have a .cpp extension so that all C++ compilers + recognize the extension without flags. Borland does not know .cxx for + example. */ +#ifndef __cplusplus +# error "A C compiler has been selected for C++." +#endif + +#if !defined(__has_include) +/* If the compiler does not have __has_include, pretend the answer is + always no. */ +# define __has_include(x) 0 +#endif + + +/* Version number components: V=Version, R=Revision, P=Patch + Version date components: YYYY=Year, MM=Month, DD=Day */ + +#if defined(__COMO__) +# define COMPILER_ID "Comeau" + /* __COMO_VERSION__ = VRR */ +# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100) +# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100) + +#elif defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# if defined(__GNUC__) +# define SIMULATE_ID "GNU" +# endif + /* __INTEL_COMPILER = VRP prior to 2021, and then VVVV for 2021 and later, + except that a few beta releases use the old format with V=2021. */ +# if __INTEL_COMPILER < 2021 || __INTEL_COMPILER == 202110 || __INTEL_COMPILER == 202111 +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) +# if defined(__INTEL_COMPILER_UPDATE) +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) +# else +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) +# endif +# else +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER_UPDATE) + /* The third version component from --version is an update index, + but no macro is provided for it. */ +# define COMPILER_VERSION_PATCH DEC(0) +# endif +# if defined(__INTEL_COMPILER_BUILD_DATE) + /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ +# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) +# endif +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +# elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +# endif +# if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif (defined(__clang__) && defined(__INTEL_CLANG_COMPILER)) || defined(__INTEL_LLVM_COMPILER) +# define COMPILER_ID "IntelLLVM" +#if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +#endif +#if defined(__GNUC__) +# define SIMULATE_ID "GNU" +#endif +/* __INTEL_LLVM_COMPILER = VVVVRP prior to 2021.2.0, VVVVRRPP for 2021.2.0 and + * later. Look for 6 digit vs. 8 digit version number to decide encoding. + * VVVV is no smaller than the current year when a version is released. + */ +#if __INTEL_LLVM_COMPILER < 1000000L +# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 10) +#else +# define COMPILER_VERSION_MAJOR DEC(__INTEL_LLVM_COMPILER/10000) +# define COMPILER_VERSION_MINOR DEC(__INTEL_LLVM_COMPILER/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__INTEL_LLVM_COMPILER % 100) +#endif +#if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +#endif +#if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +#elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +#endif +#if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +#endif +#if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +#endif + +#elif defined(__PATHCC__) +# define COMPILER_ID "PathScale" +# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) +# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) +# if defined(__PATHCC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) +# endif + +#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) +# define COMPILER_ID "Embarcadero" +# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) +# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) +# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + /* __BORLANDC__ = 0xVRR */ +# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) +# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) + +#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 +# define COMPILER_ID "Watcom" + /* __WATCOMC__ = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__WATCOMC__) +# define COMPILER_ID "OpenWatcom" + /* __WATCOMC__ = VVRP + 1100 */ +# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__SUNPRO_CC) +# define COMPILER_ID "SunPro" +# if __SUNPRO_CC >= 0x5100 + /* __SUNPRO_CC = 0xVRRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# else + /* __SUNPRO_CC = 0xVRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# endif + +#elif defined(__HP_aCC) +# define COMPILER_ID "HP" + /* __HP_aCC = VVRRPP */ +# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000) +# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100) + +#elif defined(__DECCXX) +# define COMPILER_ID "Compaq" + /* __DECCXX_VER = VVRRTPPPP */ +# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000) +# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100) +# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000) + +#elif defined(__IBMCPP__) && defined(__COMPILER_VER__) +# define COMPILER_ID "zOS" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__open_xl__) && defined(__clang__) +# define COMPILER_ID "IBMClang" +# define COMPILER_VERSION_MAJOR DEC(__open_xl_version__) +# define COMPILER_VERSION_MINOR DEC(__open_xl_release__) +# define COMPILER_VERSION_PATCH DEC(__open_xl_modification__) +# define COMPILER_VERSION_TWEAK DEC(__open_xl_ptf_fix_level__) + + +#elif defined(__ibmxl__) && defined(__clang__) +# define COMPILER_ID "XLClang" +# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__) +# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__) +# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__) +# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__) + + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800 +# define COMPILER_ID "XL" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800 +# define COMPILER_ID "VisualAge" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__NVCOMPILER) +# define COMPILER_ID "NVHPC" +# define COMPILER_VERSION_MAJOR DEC(__NVCOMPILER_MAJOR__) +# define COMPILER_VERSION_MINOR DEC(__NVCOMPILER_MINOR__) +# if defined(__NVCOMPILER_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__NVCOMPILER_PATCHLEVEL__) +# endif + +#elif defined(__PGI) +# define COMPILER_ID "PGI" +# define COMPILER_VERSION_MAJOR DEC(__PGIC__) +# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) +# endif + +#elif defined(_CRAYC) +# define COMPILER_ID "Cray" +# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) +# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) + +#elif defined(__TI_COMPILER_VERSION__) +# define COMPILER_ID "TI" + /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ +# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) +# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) +# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) + +#elif defined(__CLANG_FUJITSU) +# define COMPILER_ID "FujitsuClang" +# define COMPILER_VERSION_MAJOR DEC(__FCC_major__) +# define COMPILER_VERSION_MINOR DEC(__FCC_minor__) +# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__) +# define COMPILER_VERSION_INTERNAL_STR __clang_version__ + + +#elif defined(__FUJITSU) +# define COMPILER_ID "Fujitsu" +# if defined(__FCC_version__) +# define COMPILER_VERSION __FCC_version__ +# elif defined(__FCC_major__) +# define COMPILER_VERSION_MAJOR DEC(__FCC_major__) +# define COMPILER_VERSION_MINOR DEC(__FCC_minor__) +# define COMPILER_VERSION_PATCH DEC(__FCC_patchlevel__) +# endif +# if defined(__fcc_version) +# define COMPILER_VERSION_INTERNAL DEC(__fcc_version) +# elif defined(__FCC_VERSION) +# define COMPILER_VERSION_INTERNAL DEC(__FCC_VERSION) +# endif + + +#elif defined(__ghs__) +# define COMPILER_ID "GHS" +/* __GHS_VERSION_NUMBER = VVVVRP */ +# ifdef __GHS_VERSION_NUMBER +# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100) +# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10) +# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10) +# endif + +#elif defined(__TASKING__) +# define COMPILER_ID "Tasking" + # define COMPILER_VERSION_MAJOR DEC(__VERSION__/1000) + # define COMPILER_VERSION_MINOR DEC(__VERSION__ % 100) +# define COMPILER_VERSION_INTERNAL DEC(__VERSION__) + +#elif defined(__SCO_VERSION__) +# define COMPILER_ID "SCO" + +#elif defined(__ARMCC_VERSION) && !defined(__clang__) +# define COMPILER_ID "ARMCC" +#if __ARMCC_VERSION >= 1000000 + /* __ARMCC_VERSION = VRRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#else + /* __ARMCC_VERSION = VRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#endif + + +#elif defined(__clang__) && defined(__apple_build_version__) +# define COMPILER_ID "AppleClang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) + +#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION) +# define COMPILER_ID "ARMClang" + # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION % 10000) +# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION) + +#elif defined(__clang__) +# define COMPILER_ID "Clang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__LCC__) && (defined(__GNUC__) || defined(__GNUG__) || defined(__MCST__)) +# define COMPILER_ID "LCC" +# define COMPILER_VERSION_MAJOR DEC(__LCC__ / 100) +# define COMPILER_VERSION_MINOR DEC(__LCC__ % 100) +# if defined(__LCC_MINOR__) +# define COMPILER_VERSION_PATCH DEC(__LCC_MINOR__) +# endif +# if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define SIMULATE_ID "GNU" +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +# if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif +# endif + +#elif defined(__GNUC__) || defined(__GNUG__) +# define COMPILER_ID "GNU" +# if defined(__GNUC__) +# define COMPILER_VERSION_MAJOR DEC(__GNUC__) +# else +# define COMPILER_VERSION_MAJOR DEC(__GNUG__) +# endif +# if defined(__GNUC_MINOR__) +# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + /* _MSC_VER = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) +# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) +# if defined(_MSC_FULL_VER) +# if _MSC_VER >= 1400 + /* _MSC_FULL_VER = VVRRPPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) +# else + /* _MSC_FULL_VER = VVRRPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) +# endif +# endif +# if defined(_MSC_BUILD) +# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) +# endif + +#elif defined(_ADI_COMPILER) +# define COMPILER_ID "ADSP" +#if defined(__VERSIONNUM__) + /* __VERSIONNUM__ = 0xVVRRPPTT */ +# define COMPILER_VERSION_MAJOR DEC(__VERSIONNUM__ >> 24 & 0xFF) +# define COMPILER_VERSION_MINOR DEC(__VERSIONNUM__ >> 16 & 0xFF) +# define COMPILER_VERSION_PATCH DEC(__VERSIONNUM__ >> 8 & 0xFF) +# define COMPILER_VERSION_TWEAK DEC(__VERSIONNUM__ & 0xFF) +#endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# define COMPILER_ID "IAR" +# if defined(__VER__) && defined(__ICCARM__) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000) +# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000) +# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__) || defined(__ICCSTM8__)) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100) +# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100)) +# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# endif + + +/* These compilers are either not known or too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; +#ifdef SIMULATE_ID +char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; +#endif + +#ifdef __QNXNTO__ +char const* qnxnto = "INFO" ":" "qnxnto[]"; +#endif + +#if defined(__CRAYXT_COMPUTE_LINUX_TARGET) +char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; +#endif + +#define STRINGIFY_HELPER(X) #X +#define STRINGIFY(X) STRINGIFY_HELPER(X) + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__MSYS__) +# define PLATFORM_ID "MSYS" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU__) +# define PLATFORM_ID "Haiku" + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#elif defined(__WATCOMC__) +# if defined(__LINUX__) +# define PLATFORM_ID "Linux" + +# elif defined(__DOS__) +# define PLATFORM_ID "DOS" + +# elif defined(__OS2__) +# define PLATFORM_ID "OS2" + +# elif defined(__WINDOWS__) +# define PLATFORM_ID "Windows3x" + +# elif defined(__VXWORKS__) +# define PLATFORM_ID "VxWorks" + +# else /* unknown platform */ +# define PLATFORM_ID +# endif + +#elif defined(__INTEGRITY) +# if defined(INT_178B) +# define PLATFORM_ID "Integrity178" + +# else /* regular Integrity */ +# define PLATFORM_ID "Integrity" +# endif + +# elif defined(_ADI_COMPILER) +# define PLATFORM_ID "ADSP" + +#else /* unknown platform */ +# define PLATFORM_ID + +#endif + +/* For windows compilers MSVC and Intel we can determine + the architecture of the compiler being used. This is because + the compilers do not have flags that can change the architecture, + but rather depend on which compiler is being used +*/ +#if defined(_WIN32) && defined(_MSC_VER) +# if defined(_M_IA64) +# define ARCHITECTURE_ID "IA64" + +# elif defined(_M_ARM64EC) +# define ARCHITECTURE_ID "ARM64EC" + +# elif defined(_M_X64) || defined(_M_AMD64) +# define ARCHITECTURE_ID "x64" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# elif defined(_M_ARM64) +# define ARCHITECTURE_ID "ARM64" + +# elif defined(_M_ARM) +# if _M_ARM == 4 +# define ARCHITECTURE_ID "ARMV4I" +# elif _M_ARM == 5 +# define ARCHITECTURE_ID "ARMV5I" +# else +# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) +# endif + +# elif defined(_M_MIPS) +# define ARCHITECTURE_ID "MIPS" + +# elif defined(_M_SH) +# define ARCHITECTURE_ID "SHx" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__WATCOMC__) +# if defined(_M_I86) +# define ARCHITECTURE_ID "I86" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# if defined(__ICCARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__ICCRX__) +# define ARCHITECTURE_ID "RX" + +# elif defined(__ICCRH850__) +# define ARCHITECTURE_ID "RH850" + +# elif defined(__ICCRL78__) +# define ARCHITECTURE_ID "RL78" + +# elif defined(__ICCRISCV__) +# define ARCHITECTURE_ID "RISCV" + +# elif defined(__ICCAVR__) +# define ARCHITECTURE_ID "AVR" + +# elif defined(__ICC430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__ICCV850__) +# define ARCHITECTURE_ID "V850" + +# elif defined(__ICC8051__) +# define ARCHITECTURE_ID "8051" + +# elif defined(__ICCSTM8__) +# define ARCHITECTURE_ID "STM8" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__ghs__) +# if defined(__PPC64__) +# define ARCHITECTURE_ID "PPC64" + +# elif defined(__ppc__) +# define ARCHITECTURE_ID "PPC" + +# elif defined(__ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__x86_64__) +# define ARCHITECTURE_ID "x64" + +# elif defined(__i386__) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__TI_COMPILER_VERSION__) +# if defined(__TI_ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__MSP430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__TMS320C28XX__) +# define ARCHITECTURE_ID "TMS320C28x" + +# elif defined(__TMS320C6X__) || defined(_TMS320C6X) +# define ARCHITECTURE_ID "TMS320C6x" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +# elif defined(__ADSPSHARC__) +# define ARCHITECTURE_ID "SHARC" + +# elif defined(__ADSPBLACKFIN__) +# define ARCHITECTURE_ID "Blackfin" + +#elif defined(__TASKING__) + +# if defined(__CTC__) || defined(__CPTC__) +# define ARCHITECTURE_ID "TriCore" + +# elif defined(__CMCS__) +# define ARCHITECTURE_ID "MCS" + +# elif defined(__CARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__CARC__) +# define ARCHITECTURE_ID "ARC" + +# elif defined(__C51__) +# define ARCHITECTURE_ID "8051" + +# elif defined(__CPCP__) +# define ARCHITECTURE_ID "PCP" + +# else +# define ARCHITECTURE_ID "" +# endif + +#else +# define ARCHITECTURE_ID +#endif + +/* Convert integer to decimal digit literals. */ +#define DEC(n) \ + ('0' + (((n) / 10000000)%10)), \ + ('0' + (((n) / 1000000)%10)), \ + ('0' + (((n) / 100000)%10)), \ + ('0' + (((n) / 10000)%10)), \ + ('0' + (((n) / 1000)%10)), \ + ('0' + (((n) / 100)%10)), \ + ('0' + (((n) / 10)%10)), \ + ('0' + ((n) % 10)) + +/* Convert integer to hex digit literals. */ +#define HEX(n) \ + ('0' + ((n)>>28 & 0xF)), \ + ('0' + ((n)>>24 & 0xF)), \ + ('0' + ((n)>>20 & 0xF)), \ + ('0' + ((n)>>16 & 0xF)), \ + ('0' + ((n)>>12 & 0xF)), \ + ('0' + ((n)>>8 & 0xF)), \ + ('0' + ((n)>>4 & 0xF)), \ + ('0' + ((n) & 0xF)) + +/* Construct a string literal encoding the version number. */ +#ifdef COMPILER_VERSION +char const* info_version = "INFO" ":" "compiler_version[" COMPILER_VERSION "]"; + +/* Construct a string literal encoding the version number components. */ +#elif defined(COMPILER_VERSION_MAJOR) +char const info_version[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', + COMPILER_VERSION_MAJOR, +# ifdef COMPILER_VERSION_MINOR + '.', COMPILER_VERSION_MINOR, +# ifdef COMPILER_VERSION_PATCH + '.', COMPILER_VERSION_PATCH, +# ifdef COMPILER_VERSION_TWEAK + '.', COMPILER_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct a string literal encoding the internal version number. */ +#ifdef COMPILER_VERSION_INTERNAL +char const info_version_internal[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_', + 'i','n','t','e','r','n','a','l','[', + COMPILER_VERSION_INTERNAL,']','\0'}; +#elif defined(COMPILER_VERSION_INTERNAL_STR) +char const* info_version_internal = "INFO" ":" "compiler_version_internal[" COMPILER_VERSION_INTERNAL_STR "]"; +#endif + +/* Construct a string literal encoding the version number components. */ +#ifdef SIMULATE_VERSION_MAJOR +char const info_simulate_version[] = { + 'I', 'N', 'F', 'O', ':', + 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', + SIMULATE_VERSION_MAJOR, +# ifdef SIMULATE_VERSION_MINOR + '.', SIMULATE_VERSION_MINOR, +# ifdef SIMULATE_VERSION_PATCH + '.', SIMULATE_VERSION_PATCH, +# ifdef SIMULATE_VERSION_TWEAK + '.', SIMULATE_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; +char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; + + + +#if defined(__INTEL_COMPILER) && defined(_MSVC_LANG) && _MSVC_LANG < 201403L +# if defined(__INTEL_CXX11_MODE__) +# if defined(__cpp_aggregate_nsdmi) +# define CXX_STD 201402L +# else +# define CXX_STD 201103L +# endif +# else +# define CXX_STD 199711L +# endif +#elif defined(_MSC_VER) && defined(_MSVC_LANG) +# define CXX_STD _MSVC_LANG +#else +# define CXX_STD __cplusplus +#endif + +const char* info_language_standard_default = "INFO" ":" "standard_default[" +#if CXX_STD > 202002L + "23" +#elif CXX_STD > 201703L + "20" +#elif CXX_STD >= 201703L + "17" +#elif CXX_STD >= 201402L + "14" +#elif CXX_STD >= 201103L + "11" +#else + "98" +#endif +"]"; + +const char* info_language_extensions_default = "INFO" ":" "extensions_default[" +#if (defined(__clang__) || defined(__GNUC__) || defined(__xlC__) || \ + defined(__TI_COMPILER_VERSION__)) && \ + !defined(__STRICT_ANSI__) + "ON" +#else + "OFF" +#endif +"]"; + +/*--------------------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; + require += info_arch[argc]; +#ifdef COMPILER_VERSION_MAJOR + require += info_version[argc]; +#endif +#ifdef COMPILER_VERSION_INTERNAL + require += info_version_internal[argc]; +#endif +#ifdef SIMULATE_ID + require += info_simulate[argc]; +#endif +#ifdef SIMULATE_VERSION_MAJOR + require += info_simulate_version[argc]; +#endif +#if defined(__CRAYXT_COMPUTE_LINUX_TARGET) + require += info_cray[argc]; +#endif + require += info_language_standard_default[argc]; + require += info_language_extensions_default[argc]; + (void)argv; + return require; +} diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.exe b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.exe new file mode 100644 index 0000000000000000000000000000000000000000..ee985886546c1fb506963848bc5cac344e69faa2 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.exe differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.obj b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.obj new file mode 100644 index 0000000000000000000000000000000000000000..bce32bcc73b2f18642e4cf5e8f44822c16c9fe64 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.obj differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeConfigureLog.yaml b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeConfigureLog.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c16ef9281985658f007da8e0c1e3fb7f60633e9d --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeConfigureLog.yaml @@ -0,0 +1,250 @@ + +--- +events: + - + kind: "message-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineSystem.cmake:204 (message)" + - "CMakeLists.txt:2 (project)" + message: | + The system is: Windows - 10.0.22621 - AMD64 + - + kind: "message-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerId.cmake:17 (message)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCCompiler.cmake:123 (CMAKE_DETERMINE_COMPILER_ID)" + - "CMakeLists.txt:2 (project)" + message: | + Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded. + Compiler: C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/cl.exe + Build flags: + Id flags: + + The output was: + 0 + Microsoft (R) C/C++ Optimizing Compiler Version 19.29.30151 for x64 + Copyright (C) Microsoft Corporation. All rights reserved. + + CMakeCCompilerId.c + Microsoft (R) Incremental Linker Version 14.29.30151.0 + Copyright (C) Microsoft Corporation. All rights reserved. + + /out:CMakeCCompilerId.exe + CMakeCCompilerId.obj + + + Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "CMakeCCompilerId.exe" + + Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "CMakeCCompilerId.obj" + + The C compiler identification is MSVC, found in: + D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdC/CMakeCCompilerId.exe + + - + kind: "message-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerId.cmake:1153 (message)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerId.cmake:221 (CMAKE_DETERMINE_MSVC_SHOWINCLUDES_PREFIX)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCCompiler.cmake:123 (CMAKE_DETERMINE_COMPILER_ID)" + - "CMakeLists.txt:2 (project)" + message: | + Detecting C compiler /showIncludes prefix: + main.c + Note: including file: D:\\Projects\\Replit 3b\\replit-3B-inference\\ctransformers\\_skbuild\\win-amd64-3.11\\cmake-build\\CMakeFiles\\ShowIncludes\\foo.h + + Found prefix "Note: including file: " + - + kind: "message-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerId.cmake:17 (message)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerId.cmake:64 (__determine_compiler_id_test)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCXXCompiler.cmake:126 (CMAKE_DETERMINE_COMPILER_ID)" + - "CMakeLists.txt:2 (project)" + message: | + Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded. + Compiler: C:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Tools/MSVC/14.29.30133/bin/Hostx86/x64/cl.exe + Build flags: + Id flags: + + The output was: + 0 + Microsoft (R) C/C++ Optimizing Compiler Version 19.29.30151 for x64 + Copyright (C) Microsoft Corporation. All rights reserved. + + CMakeCXXCompilerId.cpp + Microsoft (R) Incremental Linker Version 14.29.30151.0 + Copyright (C) Microsoft Corporation. All rights reserved. + + /out:CMakeCXXCompilerId.exe + CMakeCXXCompilerId.obj + + + Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "CMakeCXXCompilerId.exe" + + Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "CMakeCXXCompilerId.obj" + + The CXX compiler identification is MSVC, found in: + D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/3.26.4/CompilerIdCXX/CMakeCXXCompilerId.exe + + - + kind: "message-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerId.cmake:1153 (message)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerId.cmake:221 (CMAKE_DETERMINE_MSVC_SHOWINCLUDES_PREFIX)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCXXCompiler.cmake:126 (CMAKE_DETERMINE_COMPILER_ID)" + - "CMakeLists.txt:2 (project)" + message: | + Detecting CXX compiler /showIncludes prefix: + main.c + Note: including file: D:\\Projects\\Replit 3b\\replit-3B-inference\\ctransformers\\_skbuild\\win-amd64-3.11\\cmake-build\\CMakeFiles\\ShowIncludes\\foo.h + + Found prefix "Note: including file: " + - + kind: "try_compile-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerABI.cmake:57 (try_compile)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeTestCCompiler.cmake:26 (CMAKE_DETERMINE_COMPILER_ABI)" + - "CMakeLists.txt:2 (project)" + checks: + - "Detecting C compiler ABI info" + directories: + source: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-yuzytc" + binary: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-yuzytc" + cmakeVariables: + CMAKE_C_FLAGS: "/DWIN32 /D_WINDOWS" + CMAKE_C_FLAGS_DEBUG: "/Zi /Ob0 /Od /RTC1" + CMAKE_EXE_LINKER_FLAGS: "/machine:x64" + CMAKE_MODULE_PATH: "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/skbuild/resources/cmake" + buildResult: + variable: "CMAKE_C_ABI_COMPILED" + cached: true + stdout: | + Change Dir: D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-yuzytc + + Run Build Command(s):C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/ninja/data/bin/ninja -v cmTC_26882 && [1/2] C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\cl.exe /nologo /DWIN32 /D_WINDOWS /Zi /Ob0 /Od /RTC1 -MDd /showIncludes /FoCMakeFiles\\cmTC_26882.dir\\CMakeCCompilerABI.c.obj /FdCMakeFiles\\cmTC_26882.dir\\ /FS -c C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\overlay\\Lib\\site-packages\\cmake\\data\\share\\cmake-3.26\\Modules\\CMakeCCompilerABI.c + [2/2] cmd.exe /C "cd . && C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\overlay\\Lib\\site-packages\\cmake\\data\\bin\\cmake.exe -E vs_link_exe --intdir=CMakeFiles\\cmTC_26882.dir --rc=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\rc.exe --mt=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\mt.exe --manifests -- C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\link.exe /nologo CMakeFiles\\cmTC_26882.dir\\CMakeCCompilerABI.c.obj /out:cmTC_26882.exe /implib:cmTC_26882.lib /pdb:cmTC_26882.pdb /version:0.0 /machine:x64 /debug /INCREMENTAL /subsystem:console kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib && cd ." + + exitCode: 0 + - + kind: "try_compile-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeDetermineCompilerABI.cmake:57 (try_compile)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CMakeTestCXXCompiler.cmake:26 (CMAKE_DETERMINE_COMPILER_ABI)" + - "CMakeLists.txt:2 (project)" + checks: + - "Detecting CXX compiler ABI info" + directories: + source: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-rv69tk" + binary: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-rv69tk" + cmakeVariables: + CMAKE_CXX_FLAGS: "/DWIN32 /D_WINDOWS /GR /EHsc" + CMAKE_CXX_FLAGS_DEBUG: "/Zi /Ob0 /Od /RTC1" + CMAKE_EXE_LINKER_FLAGS: "/machine:x64" + CMAKE_MODULE_PATH: "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/skbuild/resources/cmake" + buildResult: + variable: "CMAKE_CXX_ABI_COMPILED" + cached: true + stdout: | + Change Dir: D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-rv69tk + + Run Build Command(s):C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/ninja/data/bin/ninja -v cmTC_eec92 && [1/2] C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\cl.exe /nologo /TP /DWIN32 /D_WINDOWS /GR /EHsc /Zi /Ob0 /Od /RTC1 -MDd /showIncludes /FoCMakeFiles\\cmTC_eec92.dir\\CMakeCXXCompilerABI.cpp.obj /FdCMakeFiles\\cmTC_eec92.dir\\ /FS -c C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\overlay\\Lib\\site-packages\\cmake\\data\\share\\cmake-3.26\\Modules\\CMakeCXXCompilerABI.cpp + [2/2] cmd.exe /C "cd . && C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\overlay\\Lib\\site-packages\\cmake\\data\\bin\\cmake.exe -E vs_link_exe --intdir=CMakeFiles\\cmTC_eec92.dir --rc=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\rc.exe --mt=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\mt.exe --manifests -- C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\link.exe /nologo CMakeFiles\\cmTC_eec92.dir\\CMakeCXXCompilerABI.cpp.obj /out:cmTC_eec92.exe /implib:cmTC_eec92.lib /pdb:cmTC_eec92.pdb /version:0.0 /machine:x64 /debug /INCREMENTAL /subsystem:console kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib && cd ." + + exitCode: 0 + - + kind: "try_compile-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/Internal/CheckSourceCompiles.cmake:101 (try_compile)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CheckCSourceCompiles.cmake:76 (cmake_check_source_compiles)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/FindThreads.cmake:97 (CHECK_C_SOURCE_COMPILES)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/FindThreads.cmake:163 (_threads_check_libc)" + - "CMakeLists.txt:26 (find_package)" + checks: + - "Performing Test CMAKE_HAVE_LIBC_PTHREAD" + directories: + source: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-hmwnf3" + binary: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-hmwnf3" + cmakeVariables: + CMAKE_C_FLAGS: "/DWIN32 /D_WINDOWS" + CMAKE_C_FLAGS_DEBUG: "/Zi /Ob0 /Od /RTC1" + CMAKE_EXE_LINKER_FLAGS: "/machine:x64" + CMAKE_MODULE_PATH: "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/skbuild/resources/cmake" + buildResult: + variable: "CMAKE_HAVE_LIBC_PTHREAD" + cached: true + stdout: | + Change Dir: D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-hmwnf3 + + Run Build Command(s):C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/ninja/data/bin/ninja -v cmTC_90879 && [1/2] C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\cl.exe /nologo -DCMAKE_HAVE_LIBC_PTHREAD /DWIN32 /D_WINDOWS /Zi /Ob0 /Od /RTC1 -std:c11 -MDd /showIncludes /FoCMakeFiles\\cmTC_90879.dir\\src.c.obj /FdCMakeFiles\\cmTC_90879.dir\\ /FS -c "D:\\Projects\\Replit 3b\\replit-3B-inference\\ctransformers\\_skbuild\\win-amd64-3.11\\cmake-build\\CMakeFiles\\CMakeScratch\\TryCompile-hmwnf3\\src.c" + FAILED: CMakeFiles/cmTC_90879.dir/src.c.obj + C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\cl.exe /nologo -DCMAKE_HAVE_LIBC_PTHREAD /DWIN32 /D_WINDOWS /Zi /Ob0 /Od /RTC1 -std:c11 -MDd /showIncludes /FoCMakeFiles\\cmTC_90879.dir\\src.c.obj /FdCMakeFiles\\cmTC_90879.dir\\ /FS -c "D:\\Projects\\Replit 3b\\replit-3B-inference\\ctransformers\\_skbuild\\win-amd64-3.11\\cmake-build\\CMakeFiles\\CMakeScratch\\TryCompile-hmwnf3\\src.c" + D:\\Projects\\Replit 3b\\replit-3B-inference\\ctransformers\\_skbuild\\win-amd64-3.11\\cmake-build\\CMakeFiles\\CMakeScratch\\TryCompile-hmwnf3\\src.c(1): fatal error C1083: Cannot open include file: 'pthread.h': No such file or directory + ninja: build stopped: subcommand failed. + + exitCode: 1 + - + kind: "try_compile-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CheckLibraryExists.cmake:71 (try_compile)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/FindThreads.cmake:112 (CHECK_LIBRARY_EXISTS)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/FindThreads.cmake:175 (_threads_check_lib)" + - "CMakeLists.txt:26 (find_package)" + checks: + - "Looking for pthread_create in pthreads" + directories: + source: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-mp76nj" + binary: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-mp76nj" + cmakeVariables: + CMAKE_C_FLAGS: "/DWIN32 /D_WINDOWS" + CMAKE_C_FLAGS_DEBUG: "/Zi /Ob0 /Od /RTC1" + CMAKE_EXE_LINKER_FLAGS: "/machine:x64" + CMAKE_MODULE_PATH: "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/skbuild/resources/cmake" + buildResult: + variable: "CMAKE_HAVE_PTHREADS_CREATE" + cached: true + stdout: | + Change Dir: D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-mp76nj + + Run Build Command(s):C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/ninja/data/bin/ninja -v cmTC_aac20 && [1/2] C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\cl.exe /nologo /DWIN32 /D_WINDOWS -DCHECK_FUNCTION_EXISTS=pthread_create /Zi /Ob0 /Od /RTC1 -std:c11 -MDd /showIncludes /FoCMakeFiles\\cmTC_aac20.dir\\CheckFunctionExists.c.obj /FdCMakeFiles\\cmTC_aac20.dir\\ /FS -c "D:\\Projects\\Replit 3b\\replit-3B-inference\\ctransformers\\_skbuild\\win-amd64-3.11\\cmake-build\\CMakeFiles\\CMakeScratch\\TryCompile-mp76nj\\CheckFunctionExists.c" + [2/2] cmd.exe /C "cd . && C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\overlay\\Lib\\site-packages\\cmake\\data\\bin\\cmake.exe -E vs_link_exe --intdir=CMakeFiles\\cmTC_aac20.dir --rc=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\rc.exe --mt=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\mt.exe --manifests -- C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\link.exe /nologo CMakeFiles\\cmTC_aac20.dir\\CheckFunctionExists.c.obj /out:cmTC_aac20.exe /implib:cmTC_aac20.lib /pdb:cmTC_aac20.pdb /version:0.0 /machine:x64 /debug /INCREMENTAL /subsystem:console pthreads.lib kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib && cd ." + FAILED: cmTC_aac20.exe + cmd.exe /C "cd . && C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\overlay\\Lib\\site-packages\\cmake\\data\\bin\\cmake.exe -E vs_link_exe --intdir=CMakeFiles\\cmTC_aac20.dir --rc=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\rc.exe --mt=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\mt.exe --manifests -- C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\link.exe /nologo CMakeFiles\\cmTC_aac20.dir\\CheckFunctionExists.c.obj /out:cmTC_aac20.exe /implib:cmTC_aac20.lib /pdb:cmTC_aac20.pdb /version:0.0 /machine:x64 /debug /INCREMENTAL /subsystem:console pthreads.lib kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib && cd ." + LINK Pass 1: command "C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\link.exe /nologo CMakeFiles\\cmTC_aac20.dir\\CheckFunctionExists.c.obj /out:cmTC_aac20.exe /implib:cmTC_aac20.lib /pdb:cmTC_aac20.pdb /version:0.0 /machine:x64 /debug /INCREMENTAL /subsystem:console pthreads.lib kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib /MANIFEST /MANIFESTFILE:CMakeFiles\\cmTC_aac20.dir/intermediate.manifest CMakeFiles\\cmTC_aac20.dir/manifest.res" failed (exit code 1104) with the following output: + LINK : fatal error LNK1104: cannot open file 'pthreads.lib'\x0d + ninja: build stopped: subcommand failed. + + exitCode: 1 + - + kind: "try_compile-v1" + backtrace: + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/CheckLibraryExists.cmake:71 (try_compile)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/FindThreads.cmake:112 (CHECK_LIBRARY_EXISTS)" + - "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/cmake/data/share/cmake-3.26/Modules/FindThreads.cmake:176 (_threads_check_lib)" + - "CMakeLists.txt:26 (find_package)" + checks: + - "Looking for pthread_create in pthread" + directories: + source: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-rn2tbx" + binary: "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-rn2tbx" + cmakeVariables: + CMAKE_C_FLAGS: "/DWIN32 /D_WINDOWS" + CMAKE_C_FLAGS_DEBUG: "/Zi /Ob0 /Od /RTC1" + CMAKE_EXE_LINKER_FLAGS: "/machine:x64" + CMAKE_MODULE_PATH: "C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/skbuild/resources/cmake" + buildResult: + variable: "CMAKE_HAVE_PTHREAD_CREATE" + cached: true + stdout: | + Change Dir: D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/CMakeScratch/TryCompile-rn2tbx + + Run Build Command(s):C:/Users/milin/AppData/Local/Temp/pip-build-env-nufd4ytl/overlay/Lib/site-packages/ninja/data/bin/ninja -v cmTC_32c50 && [1/2] C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\cl.exe /nologo /DWIN32 /D_WINDOWS -DCHECK_FUNCTION_EXISTS=pthread_create /Zi /Ob0 /Od /RTC1 -std:c11 -MDd /showIncludes /FoCMakeFiles\\cmTC_32c50.dir\\CheckFunctionExists.c.obj /FdCMakeFiles\\cmTC_32c50.dir\\ /FS -c "D:\\Projects\\Replit 3b\\replit-3B-inference\\ctransformers\\_skbuild\\win-amd64-3.11\\cmake-build\\CMakeFiles\\CMakeScratch\\TryCompile-rn2tbx\\CheckFunctionExists.c" + [2/2] cmd.exe /C "cd . && C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\overlay\\Lib\\site-packages\\cmake\\data\\bin\\cmake.exe -E vs_link_exe --intdir=CMakeFiles\\cmTC_32c50.dir --rc=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\rc.exe --mt=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\mt.exe --manifests -- C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\link.exe /nologo CMakeFiles\\cmTC_32c50.dir\\CheckFunctionExists.c.obj /out:cmTC_32c50.exe /implib:cmTC_32c50.lib /pdb:cmTC_32c50.pdb /version:0.0 /machine:x64 /debug /INCREMENTAL /subsystem:console pthread.lib kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib && cd ." + FAILED: cmTC_32c50.exe + cmd.exe /C "cd . && C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\overlay\\Lib\\site-packages\\cmake\\data\\bin\\cmake.exe -E vs_link_exe --intdir=CMakeFiles\\cmTC_32c50.dir --rc=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\rc.exe --mt=C:\\PROGRA~2\\WI3CF2~1\\10\\bin\\100190~1.0\\x86\\mt.exe --manifests -- C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\link.exe /nologo CMakeFiles\\cmTC_32c50.dir\\CheckFunctionExists.c.obj /out:cmTC_32c50.exe /implib:cmTC_32c50.lib /pdb:cmTC_32c50.pdb /version:0.0 /machine:x64 /debug /INCREMENTAL /subsystem:console pthread.lib kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib && cd ." + LINK Pass 1: command "C:\\PROGRA~2\\MICROS~3\\2019\\BUILDT~1\\VC\\Tools\\MSVC\\1429~1.301\\bin\\Hostx86\\x64\\link.exe /nologo CMakeFiles\\cmTC_32c50.dir\\CheckFunctionExists.c.obj /out:cmTC_32c50.exe /implib:cmTC_32c50.lib /pdb:cmTC_32c50.pdb /version:0.0 /machine:x64 /debug /INCREMENTAL /subsystem:console pthread.lib kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib /MANIFEST /MANIFESTFILE:CMakeFiles\\cmTC_32c50.dir/intermediate.manifest CMakeFiles\\cmTC_32c50.dir/manifest.res" failed (exit code 1104) with the following output: + LINK : fatal error LNK1104: cannot open file 'pthread.lib'\x0d + ninja: build stopped: subcommand failed. + + exitCode: 1 +... diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ShowIncludes/foo.h b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ShowIncludes/foo.h new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ShowIncludes/foo.h @@ -0,0 +1 @@ + diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ShowIncludes/main.c b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ShowIncludes/main.c new file mode 100644 index 0000000000000000000000000000000000000000..cd3cbc1ff2bb98932520c792d7f0a82e1e877d80 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ShowIncludes/main.c @@ -0,0 +1,2 @@ +#include "foo.h" +int main(){} diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ShowIncludes/main.obj b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ShowIncludes/main.obj new file mode 100644 index 0000000000000000000000000000000000000000..d3e5e3777c7c21876b7c2468fde7bdf6ba5fff59 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ShowIncludes/main.obj differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/TargetDirectories.txt b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/TargetDirectories.txt new file mode 100644 index 0000000000000000000000000000000000000000..2668884c94bb2ce3c81bb37742dbad5ba0d3af8a --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/TargetDirectories.txt @@ -0,0 +1,6 @@ +D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir +D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/edit_cache.dir +D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/rebuild_cache.dir +D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/list_install_components.dir +D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/install.dir +D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/install/local.dir diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/cmake.check_cache b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/cmake.check_cache new file mode 100644 index 0000000000000000000000000000000000000000..3dccd731726d7faa8b29d8d7dba3b981a53ca497 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/cmake.check_cache @@ -0,0 +1 @@ +# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/exports.def b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/exports.def new file mode 100644 index 0000000000000000000000000000000000000000..563b2d631bc9725428954af8eb87a9b78ca923de --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/exports.def @@ -0,0 +1,5063 @@ +EXPORTS + ??_R0?AU_Crt_new_delete@std@@@8 DATA + ??_R0?AV?$_Iosb@H@std@@@8 DATA + ??_R0?AV?$_Node_class@DV?$regex_traits@D@std@@@std@@@8 DATA + ??_R0?AV?$_Node_str@D@std@@@8 DATA + ??_R0?AV?$basic_filebuf@DU?$char_traits@D@std@@@std@@@8 DATA + ??_R0?AV?$basic_ifstream@DU?$char_traits@D@std@@@std@@@8 DATA + ??_R0?AV?$basic_ios@DU?$char_traits@D@std@@@std@@@8 DATA + ??_R0?AV?$basic_iostream@DU?$char_traits@D@std@@@std@@@8 DATA + ??_R0?AV?$basic_istream@DU?$char_traits@D@std@@@std@@@8 DATA + ??_R0?AV?$basic_ostream@DU?$char_traits@D@std@@@std@@@8 DATA + ??_R0?AV?$basic_streambuf@DU?$char_traits@D@std@@@std@@@8 DATA + ??_R0?AV?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@8 DATA + ??_R0?AV?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@8 DATA + ??_R0?AV?$codecvt@_WDU_Mbstatet@@@std@@@8 DATA + ??_R0?AV?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@@8 DATA + ??_R0?AV?$collate@D@std@@@8 DATA + ??_R0?AV?$wstring_convert@V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@_WV?$allocator@_W@2@V?$allocator@D@2@@std@@@8 DATA + ??_R0?AVLLM@@@8 DATA + ??_R0?AV_Facet_base@std@@@8 DATA + ??_R0?AV_Generic_error_category@std@@@8 DATA + ??_R0?AV_Node_assert@std@@@8 DATA + ??_R0?AV_Node_back@std@@@8 DATA + ??_R0?AV_Node_base@std@@@8 DATA + ??_R0?AV_Node_capture@std@@@8 DATA + ??_R0?AV_Node_end_group@std@@@8 DATA + ??_R0?AV_Node_end_rep@std@@@8 DATA + ??_R0?AV_Node_endif@std@@@8 DATA + ??_R0?AV_Node_if@std@@@8 DATA + ??_R0?AV_Node_rep@std@@@8 DATA + ??_R0?AV_Root_node@std@@@8 DATA + ??_R0?AV_System_error@std@@@8 DATA + ??_R0?AVbad_alloc@std@@@8 DATA + ??_R0?AVbad_array_new_length@std@@@8 DATA + ??_R0?AVbad_cast@std@@@8 DATA + ??_R0?AVcodecvt_base@std@@@8 DATA + ??_R0?AVdollyv2_llm@@@8 DATA + ??_R0?AVerror_category@std@@@8 DATA + ??_R0?AVexception@std@@@8 DATA + ??_R0?AVfacet@locale@std@@@8 DATA + ??_R0?AVfalcon_llm@@@8 DATA + ??_R0?AVgpt2_llm@@@8 DATA + ??_R0?AVgpt_neox_llm@@@8 DATA + ??_R0?AVgptj_llm@@@8 DATA + ??_R0?AVios_base@std@@@8 DATA + ??_R0?AVllama_llm@@@8 DATA + ??_R0?AVmpt_llm@@@8 DATA + ??_R0?AVrange_error@std@@@8 DATA + ??_R0?AVreplit_llm@@@8 DATA + ??_R0?AVruntime_error@std@@@8 DATA + ??_R0?AVstarcoder_llm@@@8 DATA + ??_R0?AVsystem_error@std@@@8 DATA + ?_OptionsStorage@?1??__local_stdio_printf_options@@9@4_KA DATA + ?_Psave@?$_Facetptr@V?$codecvt@DDU_Mbstatet@@@std@@@std@@2PEBVfacet@locale@2@EB DATA + ?_Psave@?$_Facetptr@V?$collate@D@std@@@std@@2PEBVfacet@locale@2@EB DATA + ?_Psave@?$_Facetptr@V?$ctype@D@std@@@std@@2PEBVfacet@locale@2@EB DATA + ?_Static@?1???$_Immortalize_memcpy_image@V_Generic_error_category@std@@@std@@YAAEBV_Generic_error_category@1@XZ@4U?$_Constexpr_immortalize_impl@V_Generic_error_category@std@@@1@A DATA + ?_Stinit@?1??_Init@?$basic_filebuf@DU?$char_traits@D@std@@@std@@IEAAXPEAU_iobuf@@W4_Initfl@23@@Z@4U_Mbstatet@@A DATA + ?ws_symbol@@3V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@A DATA + ??$?0$$QEAH$$Z$$V@?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@Upiecewise_construct_t@1@V?$tuple@$$QEAH@1@V?$tuple@$$V@1@@Z + ??$?0$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$Z$$V@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@QEAA@Upiecewise_construct_t@1@V?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@Z + ??$?0$$V@?$_Compressed_pair@U?$default_delete@Ufalcon_model_loader@@@std@@PEAUfalcon_model_loader@@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@U?$default_delete@Ullama_mmap@@@std@@PEAUllama_mmap@@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@U?$default_delete@Ullama_model_loader@@@std@@PEAUllama_model_loader@@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@D@std@@V?$_String_val@U?$_Simple_types@D@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@H@std@@V?$_Vector_val@U?$_Simple_types@H@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@I@std@@V?$_Vector_val@U?$_Simple_types@I@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@M@std@@V?$_Vector_val@U?$_Simple_types@M@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@N@std@@V?$_Vector_val@U?$_Simple_types@N@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@PEAUggml_tensor@@@std@@V?$_Vector_val@U?$_Simple_types@PEAUggml_tensor@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U?$pair@NH@std@@@std@@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@U_Loop_vals_t@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Loop_vals_t@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Udollyv2_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Udollyv2_layer@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ufalcon_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ufalcon_layer@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ugpt2_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ugpt2_layer@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ugpt_neox_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ugpt_neox_layer@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ugptj_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ugptj_layer@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ullama_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_layer@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ullama_load_tensor@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_load_tensor@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ullama_load_tensor_shard@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_load_tensor_shard@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ullama_sp_bigram@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ullama_sp_symbol@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_sp_symbol@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ullama_token_data@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Umpt_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Umpt_layer@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ureplit_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ureplit_layer@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Ustarcoder_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ustarcoder_layer@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Utoken_score@llama_vocab@@@std@@V?$_Vector_val@U?$_Simple_types@Utoken_score@llama_vocab@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@V?$sub_match@PEBD@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$sub_match@PEBD@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@Vthread@std@@@std@@V?$_Vector_val@U?$_Simple_types@Vthread@std@@@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@_J@std@@V?$_Vector_val@U?$_Simple_types@_J@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0$$V@?$_Compressed_pair@V?$allocator@_W@std@@V?$_String_val@U?$_Simple_types@_W@std@@@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@@Z + ??$?0AEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@_N@std@@QEAA@AEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@1@$$QEA_N@Z + ??$?0AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N@std@@QEAA@AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@$$QEA_N@Z + ??$?0AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N@std@@QEAA@AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@$$QEA_N@Z + ??$?0AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@_N@std@@QEAA@AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@1@$$QEA_N@Z + ??$?0AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@_N@std@@QEAA@AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@1@$$QEA_N@Z + ??$?0AEAPEAUfalcon_file_loader@@@?$_Compressed_pair@U?$default_delete@Ufalcon_file_loader@@@std@@PEAUfalcon_file_loader@@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@AEAPEAUfalcon_file_loader@@@Z + ??$?0AEAPEAUfalcon_model_loader@@@?$_Compressed_pair@U?$default_delete@Ufalcon_model_loader@@@std@@PEAUfalcon_model_loader@@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@AEAPEAUfalcon_model_loader@@@Z + ??$?0AEAPEAUllama_file_loader@@@?$_Compressed_pair@U?$default_delete@Ullama_file_loader@@@std@@PEAUllama_file_loader@@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@AEAPEAUllama_file_loader@@@Z + ??$?0AEAPEAUllama_model_loader@@@?$_Compressed_pair@U?$default_delete@Ullama_model_loader@@@std@@PEAUllama_model_loader@@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@AEAPEAUllama_model_loader@@@Z + ??$?0AEAPEAUllama_token_data@@AEAPEAU0@$0A@@?$pair@PEAUllama_token_data@@PEAU1@@std@@QEAA@AEAPEAUllama_token_data@@0@Z + ??$?0AEAPEAV_Facet_base@std@@@?$_Compressed_pair@U?$default_delete@V_Facet_base@std@@@std@@PEAV_Facet_base@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@AEAPEAV_Facet_base@1@@Z + ??$?0AEAPEAV_Node_assert@std@@@?$_Compressed_pair@U?$default_delete@V_Node_assert@std@@@std@@PEAV_Node_assert@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@AEAPEAV_Node_assert@1@@Z + ??$?0AEAPEA_KAEAPEA_K$0A@@?$pair@PEA_KPEA_K@std@@QEAA@AEAPEA_K0@Z + ??$?0AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$?0AEAV?$allocator@D@std@@$$V@?$_Compressed_pair@V?$allocator@D@std@@V?$_String_val@U?$_Simple_types@D@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEAV?$allocator@D@1@@Z + ??$?0AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAPEAUggml_tensor@@$0A@@?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@QEAA@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEAPEAUggml_tensor@@@Z + ??$?0AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEA_K$0A@@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@QEAA@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEA_K@Z + ??$?0AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEA_K@?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEA_K@Z + ??$?0AEAV?$vector@HV?$allocator@H@std@@@std@@AEAM$0A@@?$pair@V?$vector@HV?$allocator@H@std@@@std@@M@std@@QEAA@AEAV?$vector@HV?$allocator@H@std@@@1@AEAM@Z + ??$?0AEA_K@?$_Tuple_val@_K@std@@QEAA@AEA_K@Z + ??$?0AEA_KM$0A@@?$pair@_KM@std@@QEAA@AEA_K$$QEAM@Z + ??$?0AEBH$$Z$$V@?$pair@$$CBHH@std@@QEAA@Upiecewise_construct_t@1@V?$tuple@AEBH@1@V?$tuple@$$V@1@@Z + ??$?0AEBH$$Z$$V@?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@Upiecewise_construct_t@1@V?$tuple@AEBH@1@V?$tuple@$$V@1@@Z + ??$?0AEBH$0A@@?$tuple@AEBH@std@@QEAA@AEBH@Z + ??$?0AEBH@?$_List_node_emplace_op2@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@HPEAX@std@@@1@AEBH@Z + ??$?0AEBH@?$_Tuple_val@AEBH@std@@QEAA@AEBH@Z + ??$?0AEBQEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@_N@std@@QEAA@AEBQEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@1@$$QEA_N@Z + ??$?0AEBQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N@std@@QEAA@AEBQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@$$QEA_N@Z + ??$?0AEBQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N@std@@QEAA@AEBQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@$$QEA_N@Z + ??$?0AEBU?$less@H@std@@U_Zero_then_variadic_args_t@1@@?$_Compressed_pair@U?$less@H@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@2@$00@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBU?$less@H@1@$$QEAU_Zero_then_variadic_args_t@1@@Z + ??$?0AEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@U_Zero_then_variadic_args_t@1@@?$_Compressed_pair@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@$00@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAU_Zero_then_variadic_args_t@1@@Z + ??$?0AEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@U_Zero_then_variadic_args_t@1@@?$_Compressed_pair@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAU_Zero_then_variadic_args_t@1@@Z + ??$?0AEBU?$less@W4e_model@@@std@@U_Zero_then_variadic_args_t@1@@?$_Compressed_pair@U?$less@W4e_model@@@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@2@$00@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBU?$less@W4e_model@@@1@$$QEAU_Zero_then_variadic_args_t@1@@Z + ??$?0AEBU?$less@W4falcon_e_model@@@std@@U_Zero_then_variadic_args_t@1@@?$_Compressed_pair@U?$less@W4falcon_e_model@@@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@2@$00@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBU?$less@W4falcon_e_model@@@1@$$QEAU_Zero_then_variadic_args_t@1@@Z + ??$?0AEBU?$pair@$$CBW4e_model@@_K@std@@@?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@1@AEBU?$pair@$$CBW4e_model@@_K@1@@Z + ??$?0AEBU?$pair@$$CBW4falcon_e_model@@_K@std@@@?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@1@AEBU?$pair@$$CBW4falcon_e_model@@_K@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@$$QEAH@1@V?$tuple@$$V@1@@?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@$$QEAH@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@AEBH@1@V?$tuple@$$V@1@@?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBH@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@AEBH@1@V?$tuple@$$V@1@@?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBH@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBUpiecewise_construct_t@std@@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$?0AEBV?$allocator@D@std@@$$V@?$_Compressed_pair@V?$allocator@D@std@@V?$_String_val@U?$_Simple_types@D@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@D@1@@Z + ??$?0AEBV?$allocator@D@std@@$$V@?$_Compressed_pair@V?$allocator@D@std@@V?$_Vector_val@U?$_Simple_types@D@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@D@1@@Z + ??$?0AEBV?$allocator@E@std@@$$V@?$_Compressed_pair@V?$allocator@E@std@@V?$_Vector_val@U?$_Simple_types@E@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@E@1@@Z + ??$?0AEBV?$allocator@H@std@@$$V@?$_Compressed_pair@V?$allocator@H@std@@V?$_Vector_val@U?$_Simple_types@H@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@H@1@@Z + ??$?0AEBV?$allocator@H@std@@$$V@?$_Compressed_pair@V?$allocator@U?$_List_node@HPEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@H@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@H@1@@Z + ??$?0AEBV?$allocator@H@std@@$$V@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@H@1@@Z + ??$?0AEBV?$allocator@H@std@@$0A@@?$_Hash_vec@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@QEAA@AEBV?$allocator@H@1@@Z + ??$?0AEBV?$allocator@I@std@@$$V@?$_Compressed_pair@V?$allocator@I@std@@V?$_Vector_val@U?$_Simple_types@I@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@I@1@@Z + ??$?0AEBV?$allocator@M@std@@$$V@?$_Compressed_pair@V?$allocator@M@std@@V?$_Vector_val@U?$_Simple_types@M@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@M@1@@Z + ??$?0AEBV?$allocator@N@std@@$$V@?$_Compressed_pair@V?$allocator@N@std@@V?$_Vector_val@U?$_Simple_types@N@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@N@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBHH@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBHH@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBHH@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBHH@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBHH@std@@@std@@$0A@@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBHH@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@$0A@@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@$0A@@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@$0A@@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@1@@Z + ??$?0AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@$0A@@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@1@@Z + ??$?0AEBV?$allocator@Ullama_buffer@@@std@@$$V@?$_Compressed_pair@V?$allocator@Ullama_buffer@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_buffer@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@Ullama_buffer@@@1@@Z + ??$?0AEBV?$allocator@_J@std@@$$V@?$_Compressed_pair@V?$allocator@_J@std@@V?$_Vector_val@U?$_Simple_types@_J@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@_J@1@@Z + ??$?0AEBV?$allocator@_K@std@@$$V@?$_Compressed_pair@V?$allocator@_K@std@@V?$_Vector_val@U?$_Simple_types@_K@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@AEBV?$allocator@_K@1@@Z + ??$?0AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$Z$$V@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@QEAA@Upiecewise_construct_t@1@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@Z + ??$?0AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$Z$$V@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@QEAA@Upiecewise_construct_t@1@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@Z + ??$?0AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$Z$$V@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@QEAA@Upiecewise_construct_t@1@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@Z + ??$?0AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$Z$$V@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@QEAA@Upiecewise_construct_t@1@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V?$tuple@$$V@1@@Z + ??$?0AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$0A@@?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$?0AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Tuple_val@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$?0AEBW4ggml_type@@@?$_Tuple_val@W4ggml_type@@@std@@QEAA@AEBW4ggml_type@@@Z + ??$?0H$$V$0A@@?$tuple@$$QEAH@std@@QEAA@$$QEAH@Z + ??$?0H@?$_Alloc_temporary2@V?$allocator@H@std@@@std@@QEAA@AEAV?$allocator@H@1@$$QEAH@Z + ??$?0H@?$_Tuple_val@$$QEAH@std@@QEAA@$$QEAH@Z + ??$?0H@?$allocator@U?$_List_node@HPEAX@std@@@std@@QEAA@AEBV?$allocator@H@1@@Z + ??$?0H@?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@QEAA@AEBV?$allocator@H@1@@Z + ??$?0M@?$_Compressed_pair@U?$equal_to@H@std@@M$00@std@@QEAA@U_Zero_then_variadic_args_t@1@$$QEAM@Z + ??$?0M@?$_Compressed_pair@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@M$00@std@@QEAA@U_Zero_then_variadic_args_t@1@$$QEAM@Z + ??$?0NAEAH$0A@@?$pair@NH@std@@QEAA@$$QEANAEAH@Z + ??$?0NH$0A@@?$pair@NH@std@@QEAA@XZ + ??$?0PEAD$0A@@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@PEAD0AEBV?$allocator@D@1@@Z + ??$?0PEAD$0A@@?$vector@DV?$allocator@D@std@@@std@@QEAA@PEAD0AEBV?$allocator@D@1@@Z + ??$?0PEAE@?$_Tuple_val@PEAE@std@@QEAA@$$QEAPEAE@Z + ??$?0PEAM@?$_Tuple_val@PEAM@std@@QEAA@$$QEAPEAM@Z + ??$?0PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@_N@std@@QEAA@$$QEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@1@$$QEA_N@Z + ??$?0PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N@std@@QEAA@$$QEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@$$QEA_N@Z + ??$?0PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N@std@@QEAA@$$QEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@$$QEA_N@Z + ??$?0PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@_N@std@@QEAA@$$QEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@1@$$QEA_N@Z + ??$?0PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@_N@std@@QEAA@$$QEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@1@$$QEA_N@Z + ??$?0PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@_N@std@@QEAA@$$QEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@1@$$QEA_N@Z + ??$?0PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N@std@@QEAA@$$QEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@$$QEA_N@Z + ??$?0PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N$0A@@?$pair@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N@std@@QEAA@$$QEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@$$QEA_N@Z + ??$?0PEBD$0A@@?$vector@DV?$allocator@D@std@@@std@@QEAA@PEBD0AEBV?$allocator@D@1@@Z + ??$?0PEBDPEBD$0A@@?$pair@PEBDPEBD@std@@QEAA@XZ + ??$?0PEBH$0A@@?$vector@HV?$allocator@H@std@@@std@@QEAA@PEBH0AEBV?$allocator@H@1@@Z + ??$?0U?$char_traits@D@std@@V?$allocator@D@1@@?$basic_regex@DV?$regex_traits@D@std@@@std@@QEAA@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@W4syntax_option_type@regex_constants@1@@Z + ??$?0U?$default_delete@Ufalcon_file_loader@@@std@@$0A@@?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@QEAA@$$QEAV01@@Z + ??$?0U?$default_delete@Ufalcon_file_loader@@@std@@$0A@@?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@QEAA@PEAUfalcon_file_loader@@@Z + ??$?0U?$default_delete@Ufalcon_file_loader@@@std@@PEAUfalcon_file_loader@@@?$_Compressed_pair@U?$default_delete@Ufalcon_file_loader@@@std@@PEAUfalcon_file_loader@@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAU?$default_delete@Ufalcon_file_loader@@@1@$$QEAPEAUfalcon_file_loader@@@Z + ??$?0U?$default_delete@Ufalcon_model_loader@@@std@@$0A@@?$unique_ptr@Ufalcon_model_loader@@U?$default_delete@Ufalcon_model_loader@@@std@@@std@@QEAA@PEAUfalcon_model_loader@@@Z + ??$?0U?$default_delete@Ufalcon_model_loader@@@std@@$0A@@?$unique_ptr@Ufalcon_model_loader@@U?$default_delete@Ufalcon_model_loader@@@std@@@std@@QEAA@XZ + ??$?0U?$default_delete@Ullama_file_loader@@@std@@$0A@@?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@QEAA@$$QEAV01@@Z + ??$?0U?$default_delete@Ullama_file_loader@@@std@@$0A@@?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@QEAA@PEAUllama_file_loader@@@Z + ??$?0U?$default_delete@Ullama_file_loader@@@std@@PEAUllama_file_loader@@@?$_Compressed_pair@U?$default_delete@Ullama_file_loader@@@std@@PEAUllama_file_loader@@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAU?$default_delete@Ullama_file_loader@@@1@$$QEAPEAUllama_file_loader@@@Z + ??$?0U?$default_delete@Ullama_mmap@@@std@@$0A@@?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@std@@QEAA@XZ + ??$?0U?$default_delete@Ullama_model_loader@@@std@@$0A@@?$unique_ptr@Ullama_model_loader@@U?$default_delete@Ullama_model_loader@@@std@@@std@@QEAA@PEAUllama_model_loader@@@Z + ??$?0U?$default_delete@Ullama_model_loader@@@std@@$0A@@?$unique_ptr@Ullama_model_loader@@U?$default_delete@Ullama_model_loader@@@std@@@std@@QEAA@XZ + ??$?0U?$default_delete@V_Facet_base@std@@@std@@$0A@@?$unique_ptr@V_Facet_base@std@@U?$default_delete@V_Facet_base@std@@@2@@std@@QEAA@PEAV_Facet_base@1@@Z + ??$?0U?$default_delete@V_Node_assert@std@@@std@@$0A@@?$unique_ptr@V_Node_assert@std@@U?$default_delete@V_Node_assert@std@@@2@@std@@QEAA@$$QEAV01@@Z + ??$?0U?$default_delete@V_Node_assert@std@@@std@@$0A@@?$unique_ptr@V_Node_assert@std@@U?$default_delete@V_Node_assert@std@@@2@@std@@QEAA@PEAV_Node_assert@1@@Z + ??$?0U?$default_delete@V_Node_assert@std@@@std@@PEAV_Node_assert@1@@?$_Compressed_pair@U?$default_delete@V_Node_assert@std@@@std@@PEAV_Node_assert@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAU?$default_delete@V_Node_assert@std@@@1@$$QEAPEAV_Node_assert@1@@Z + ??$?0U?$pair@$$CBHH@std@@@?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBHH@std@@@1@@Z + ??$?0U?$pair@$$CBHH@std@@@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBHH@std@@@1@@Z + ??$?0U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@1@@Z + ??$?0U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@1@@Z + ??$?0U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@@Z + ??$?0U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@@Z + ??$?0U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@1@@Z + ??$?0U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@1@@Z + ??$?0U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@1@@Z + ??$?0U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@1@@Z + ??$?0U_Exact_args_t@std@@$0A@@?$tuple@$$V@std@@QEAA@U_Exact_args_t@1@@Z + ??$?0U_Exact_args_t@std@@AEA_K$$V$0A@@?$tuple@_K@std@@QEAA@U_Exact_args_t@1@AEA_K@Z + ??$?0U_Exact_args_t@std@@AEBH$$V$0A@@?$tuple@AEBH@std@@QEAA@U_Exact_args_t@1@AEBH@Z + ??$?0U_Exact_args_t@std@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@$$V$0A@@?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@U_Exact_args_t@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$?0U_Exact_args_t@std@@AEBW4ggml_type@@PEAEPEAMAEA_K$0A@@?$tuple@W4ggml_type@@PEAEPEAM_K@std@@QEAA@U_Exact_args_t@1@AEBW4ggml_type@@$$QEAPEAE$$QEAPEAMAEA_K@Z + ??$?0U_Exact_args_t@std@@H$$V$0A@@?$tuple@$$QEAH@std@@QEAA@U_Exact_args_t@1@$$QEAH@Z + ??$?0U_Exact_args_t@std@@PEAEPEAMAEA_K$0A@@?$tuple@PEAEPEAM_K@std@@QEAA@U_Exact_args_t@1@$$QEAPEAE$$QEAPEAMAEA_K@Z + ??$?0U_Exact_args_t@std@@PEAMAEA_K$0A@@?$tuple@PEAM_K@std@@QEAA@U_Exact_args_t@1@$$QEAPEAMAEA_K@Z + ??$?0U_Exact_args_t@std@@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@$$V$0A@@?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@U_Exact_args_t@1@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$?0U_Zero_then_variadic_args_t@std@@M@?$_Compressed_pair@U?$hash@H@std@@V?$_Compressed_pair@U?$equal_to@H@std@@M$00@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@$$QEAU21@$$QEAM@Z + ??$?0U_Zero_then_variadic_args_t@std@@M@?$_Compressed_pair@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_Compressed_pair@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@M$00@2@$00@std@@QEAA@U_Zero_then_variadic_args_t@1@$$QEAU21@$$QEAM@Z + ??$?0V?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@$0A@@?$vector@HV?$allocator@H@std@@@std@@QEAA@V?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@1@0AEBV?$allocator@H@1@@Z + ??$?0V?$_List_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@_N$0A@@?$pair@V?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@_N@std@@QEAA@$$QEAV?$_List_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@1@$$QEA_N@Z + ??$?0V?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@_N$0A@@?$pair@V?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@_N@std@@QEAA@$$QEAV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@1@$$QEA_N@Z + ??$?0V?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@_N$0A@@?$pair@V?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@_N@std@@QEAA@$$QEAV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@1@$$QEA_N@Z + ??$?0V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@$0A@@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@1@0AEBV?$allocator@D@1@@Z + ??$?0V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V01@$0A@@?$pair@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V12@@std@@QEAA@XZ + ??$?0V?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@$0A@@?$vector@DV?$allocator@D@std@@@std@@QEAA@V?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@1@0AEBV?$allocator@D@1@@Z + ??$?0V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@$0A@@?$vector@HV?$allocator@H@std@@@std@@QEAA@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@1@0AEBV?$allocator@H@1@@Z + ??$?0V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@$0A@@?$vector@NV?$allocator@N@std@@@std@@QEAA@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@1@0AEBV?$allocator@N@1@@Z + ??$?0V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@@?$discrete_distribution@H@std@@QEAA@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@1@0@Z + ??$?0V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@@param_type@?$discrete_distribution@H@std@@QEAA@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@2@0@Z + ??$?0V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@$0A@@?$vector@NV?$allocator@N@std@@@std@@QEAA@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@1@0AEBV?$allocator@N@1@@Z + ??$?0V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@@?$discrete_distribution@H@std@@QEAA@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@1@0@Z + ??$?0V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@@param_type@?$discrete_distribution@H@std@@QEAA@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@2@0@Z + ??$?0V?$allocator@D@std@@$$V@?$_Compressed_pair@V?$allocator@D@std@@V?$_String_val@U?$_Simple_types@D@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@D@1@@Z + ??$?0V?$allocator@H@std@@$$V@?$_Compressed_pair@V?$allocator@H@std@@V?$_Vector_val@U?$_Simple_types@H@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@H@1@@Z + ??$?0V?$allocator@H@std@@PEAHPEAHPEAH@?$_Compressed_pair@V?$allocator@H@std@@V?$_Vector_val@U?$_Simple_types@H@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@H@1@$$QEAPEAH22@Z + ??$?0V?$allocator@I@std@@$$V@?$_Compressed_pair@V?$allocator@I@std@@V?$_Vector_val@U?$_Simple_types@I@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@I@1@@Z + ??$?0V?$allocator@I@std@@PEAIPEAIPEAI@?$_Compressed_pair@V?$allocator@I@std@@V?$_Vector_val@U?$_Simple_types@I@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@I@1@$$QEAPEAI22@Z + ??$?0V?$allocator@PEAUggml_tensor@@@std@@PEAPEAUggml_tensor@@PEAPEAU2@PEAPEAU2@@?$_Compressed_pair@V?$allocator@PEAUggml_tensor@@@std@@V?$_Vector_val@U?$_Simple_types@PEAUggml_tensor@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@PEAUggml_tensor@@@1@$$QEAPEAPEAUggml_tensor@@22@Z + ??$?0V?$allocator@U?$_List_node@HPEAX@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@U?$_List_node@HPEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@H@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@U?$_List_node@HPEAX@std@@@1@@Z + ??$?0V?$allocator@U?$_List_node@HPEAX@std@@@std@@@?$list@HV?$allocator@H@std@@@std@@AEAA@U_Move_allocator_tag@1@AEAV?$allocator@U?$_List_node@HPEAX@std@@@1@@Z + ??$?0V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@1@@Z + ??$?0V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@1@@Z + ??$?0V?$allocator@Ullama_load_tensor_shard@@@std@@PEAUllama_load_tensor_shard@@PEAU2@PEAU2@@?$_Compressed_pair@V?$allocator@Ullama_load_tensor_shard@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_load_tensor_shard@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@Ullama_load_tensor_shard@@@1@$$QEAPEAUllama_load_tensor_shard@@22@Z + ??$?0V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@$$V@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@1@@Z + ??$?0V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@$0A@@?$_Hash_vec@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@QEAA@$$QEAV?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@1@@Z + ??$?0V?$allocator@_W@std@@$$V@?$_Compressed_pair@V?$allocator@_W@std@@V?$_String_val@U?$_Simple_types@_W@std@@@2@$00@std@@QEAA@U_One_then_variadic_args_t@1@$$QEAV?$allocator@_W@1@@Z + ??$?0V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$V$0A@@?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$?0V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Tuple_val@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$?0V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@$0A@@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@QEAA@AEBU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$?0V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@@locale@std@@QEAA@AEBV01@PEBV?$codecvt_utf8@_W$0BAPPPP@$0A@@1@@Z + ??$?0V?$tuple@$$QEAH@std@@V?$tuple@$$V@1@$0A@$$Z$S@?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@AEAV?$tuple@$$QEAH@1@AEAV?$tuple@$$V@1@U?$integer_sequence@_K$0A@@1@U?$integer_sequence@_K$S@1@@Z + ??$?0V?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$tuple@$$V@1@$0A@$$Z$S@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@QEAA@AEAV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@AEAV?$tuple@$$V@1@U?$integer_sequence@_K$0A@@1@U?$integer_sequence@_K$S@1@@Z + ??$?0V?$tuple@AEBH@std@@V?$tuple@$$V@1@$0A@$$Z$S@?$pair@$$CBHH@std@@QEAA@AEAV?$tuple@AEBH@1@AEAV?$tuple@$$V@1@U?$integer_sequence@_K$0A@@1@U?$integer_sequence@_K$S@1@@Z + ??$?0V?$tuple@AEBH@std@@V?$tuple@$$V@1@$0A@$$Z$S@?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@AEAV?$tuple@AEBH@1@AEAV?$tuple@$$V@1@U?$integer_sequence@_K$0A@@1@U?$integer_sequence@_K$S@1@@Z + ??$?0V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$tuple@$$V@1@$0A@$$Z$S@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@QEAA@AEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@AEAV?$tuple@$$V@1@U?$integer_sequence@_K$0A@@1@U?$integer_sequence@_K$S@1@@Z + ??$?0V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$tuple@$$V@1@$0A@$$Z$S@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@QEAA@AEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@AEAV?$tuple@$$V@1@U?$integer_sequence@_K$0A@@1@U?$integer_sequence@_K$S@1@@Z + ??$?0V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$tuple@$$V@1@$0A@$$Z$S@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@QEAA@AEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@AEAV?$tuple@$$V@1@U?$integer_sequence@_K$0A@@1@U?$integer_sequence@_K$S@1@@Z + ??$?0V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$tuple@$$V@1@$0A@$$Z$S@?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@QEAA@AEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@AEAV?$tuple@$$V@1@U?$integer_sequence@_K$0A@@1@U?$integer_sequence@_K$S@1@@Z + ??$?0V?$vector@HV?$allocator@H@std@@@std@@M$0A@@?$pair@V?$vector@HV?$allocator@H@std@@@std@@M@std@@QEAA@$$QEAV?$vector@HV?$allocator@H@std@@@1@$$QEAM@Z + ??$?0W4e_model@@_K$0A@@?$pair@$$CBW4e_model@@_K@std@@QEAA@$$QEAW4e_model@@$$QEA_K@Z + ??$?0W4falcon_e_model@@_K$0A@@?$pair@$$CBW4falcon_e_model@@_K@std@@QEAA@$$QEAW4falcon_e_model@@$$QEA_K@Z + ??$?0_KM$0A@@?$pair@_KM@std@@QEAA@XZ + ??$?0_N@?$allocator@I@std@@QEAA@AEBV?$allocator@_N@1@@Z + ??$?4U?$default_delete@Ullama_mmap@@@std@@$0A@@?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@std@@QEAAAEAV01@$$QEAV01@@Z + ??$?4U?$pair@NH@std@@$0A@@?$pair@NH@std@@QEAAAEAU01@$$QEAU01@@Z + ??$?4U?$pair@_KM@std@@$0A@@?$pair@_KM@std@@QEAAAEAU01@$$QEAU01@@Z + ??$?5DU?$char_traits@D@std@@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@std@@YAAEAV?$basic_istream@DU?$char_traits@D@std@@@0@AEAV10@AEAV?$mersenne_twister@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@0@@Z + ??$?6DU?$char_traits@D@std@@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@std@@YAAEAV?$basic_ostream@DU?$char_traits@D@std@@@0@AEAV10@AEBV?$mersenne_twister@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@0@@Z + ??$?6U?$char_traits@D@std@@@std@@YAAEAV?$basic_ostream@DU?$char_traits@D@std@@@0@AEAV10@D@Z + ??$?8DU?$char_traits@D@std@@V?$allocator@D@1@@std@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@0@Z + ??$?8DU?$char_traits@D@std@@V?$allocator@D@1@@std@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@QEBD@Z + ??$?8IV?$allocator@I@std@@@std@@YA_NAEBV?$vector@IV?$allocator@I@std@@@0@0@Z + ??$?9DU?$char_traits@D@std@@V?$allocator@D@1@@std@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@QEBD@Z + ??$?9IV?$allocator@I@std@@@std@@YA_NAEBV?$vector@IV?$allocator@I@std@@@0@0@Z + ??$?HDU?$char_traits@D@std@@V?$allocator@D@1@@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@$$QEAV10@0@Z + ??$?HDU?$char_traits@D@std@@V?$allocator@D@1@@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@$$QEAV10@QEBD@Z + ??$?HDU?$char_traits@D@std@@V?$allocator@D@1@@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEBV10@QEBD@Z + ??$?HDU?$char_traits@D@std@@V?$allocator@D@1@@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@QEBD$$QEAV10@@Z + ??$?HDU?$char_traits@D@std@@V?$allocator@D@1@@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@QEBDAEBV10@@Z + ??$?MDU?$char_traits@D@std@@V?$allocator@D@1@@std@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@0@Z + ??$?RAEAMAEAM@?$plus@X@std@@QEBAMAEAM0@Z + ??$?RAEBIAEBI@?$equal_to@X@std@@QEBA_NAEBI0@Z + ??$?RAEBNAEBN@?$less@X@std@@QEBA_NAEBN0@Z + ??$?RH@?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@QEBA_KAEBH@Z + ??$?RHH@?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@QEBA_NAEBH0@Z + ??$?RV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEBA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$?RV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V01@@?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEBA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@0@Z + ??$?RV?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@std@@@?$discrete_distribution@H@std@@QEBAHAEAV?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@1@@Z + ??$_Adl_verify_range@PEADPEAD@std@@YAXAEBQEAD0@Z + ??$_Adl_verify_range@PEAIPEAI@std@@YAXAEBQEAI0@Z + ??$_Adl_verify_range@PEANPEAN@std@@YAXAEBQEAN0@Z + ??$_Adl_verify_range@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@PEAU123@@std@@YAXAEBQEAU_Grp_t@?$_Tgt_state_t@PEBD@0@0@Z + ??$_Adl_verify_range@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@PEAU123@@std@@YAXAEBQEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@0@Z + ??$_Adl_verify_range@PEAUllama_token_data@@PEAU1@@std@@YAXAEBQEAUllama_token_data@@0@Z + ??$_Adl_verify_range@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@PEAV12@@std@@YAXAEBQEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@0@Z + ??$_Adl_verify_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@PEAV12@@std@@YAXAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@0@0@Z + ??$_Adl_verify_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@PEAV12@@std@@YAXAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@0@Z + ??$_Adl_verify_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@PEAV12@@std@@YAXAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@0@Z + ??$_Adl_verify_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@PEAV12@@std@@YAXAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@0@0@Z + ??$_Adl_verify_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@PEAV12@@std@@YAXAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@0@Z + ??$_Adl_verify_range@PEBDPEBD@std@@YAXAEBQEBD0@Z + ??$_Adl_verify_range@PEBHPEBH@std@@YAXAEBQEBH0@Z + ??$_Adl_verify_range@PEBIPEBI@std@@YAXAEBQEBI0@Z + ??$_Adl_verify_range@PEBU?$pair@$$CBW4e_model@@_K@std@@PEBU12@@std@@YAXAEBQEBU?$pair@$$CBW4e_model@@_K@0@0@Z + ??$_Adl_verify_range@PEBU?$pair@$$CBW4falcon_e_model@@_K@std@@PEBU12@@std@@YAXAEBQEBU?$pair@$$CBW4falcon_e_model@@_K@0@0@Z + ??$_Adl_verify_range@V?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@V12@@std@@YAXAEBV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@V12@@std@@YAXAEBV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@0@Z + ??$_Adl_verify_range@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V12@@std@@YAXAEBV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V12@@std@@YAXAEBV?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@V12@@std@@YAXAEBV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@V12@@std@@YAXAEBV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@V12@@std@@YAXAEBV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@V12@@std@@YAXAEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@V12@@std@@YAXAEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@V12@@std@@YAXAEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@std@@V12@@std@@YAXAEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@std@@V12@@std@@YAXAEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@0@0@Z + ??$_Adl_verify_range@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@std@@V12@@std@@YAXAEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@0@0@Z + ??$_Allocate@$0BA@U_Default_allocate_traits@std@@$0A@@std@@YAPEAX_K@Z + ??$_Allocate_manually_vector_aligned@U_Default_allocate_traits@std@@@std@@YAPEAX_K@Z + ??$_Allocators_equal@V?$allocator@D@std@@@std@@YA_NAEBV?$allocator@D@0@0@Z + ??$_Assign_range@PEAI@?$vector@IV?$allocator@I@std@@@std@@AEAAXPEAI0Uforward_iterator_tag@1@@Z + ??$_Assign_range@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAXPEAU_Grp_t@?$_Tgt_state_t@PEBD@1@0Uforward_iterator_tag@1@@Z + ??$_Assign_range@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAXPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@0Uforward_iterator_tag@1@@Z + ??$_Assign_range@PEBI@?$vector@IV?$allocator@I@std@@@std@@AEAAXPEBI0Uforward_iterator_tag@1@@Z + ??$_Buyheadnode@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@SAPEAU01@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@@Z + ??$_Buyheadnode@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@SAPEAU01@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@@Z + ??$_Buyheadnode@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@SAPEAU01@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@@Z + ??$_Buyheadnode@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@SAPEAU01@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@@Z + ??$_Buyheadnode@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@SAPEAU01@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@@Z + ??$_Cmp_chrange@PEBDPEBDU?$_Cmp_collate@V?$regex_traits@D@std@@@std@@@std@@YAPEBDPEBD000U?$_Cmp_collate@V?$regex_traits@D@std@@@0@@Z + ??$_Cmp_chrange@PEBDPEBDU?$_Cmp_cs@V?$regex_traits@D@std@@@std@@@std@@YAPEBDPEBD000U?$_Cmp_cs@V?$regex_traits@D@std@@@0@@Z + ??$_Cmp_chrange@PEBDPEBDU?$_Cmp_icase@V?$regex_traits@D@std@@@std@@@std@@YAPEBDPEBD000U?$_Cmp_icase@V?$regex_traits@D@std@@@0@@Z + ??$_Cmp_chrange@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@PEBDU?$_Cmp_collate@V?$regex_traits@D@std@@@2@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@0PEBD1U?$_Cmp_collate@V?$regex_traits@D@std@@@0@@Z + ??$_Cmp_chrange@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@PEBDU?$_Cmp_cs@V?$regex_traits@D@std@@@2@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@0PEBD1U?$_Cmp_cs@V?$regex_traits@D@std@@@0@@Z + ??$_Cmp_chrange@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@PEBDU?$_Cmp_icase@V?$regex_traits@D@std@@@2@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@0PEBD1U?$_Cmp_icase@V?$regex_traits@D@std@@@0@@Z + ??$_Cmp_chrange@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V12@U?$_Cmp_collate@V?$regex_traits@D@std@@@2@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@000U?$_Cmp_collate@V?$regex_traits@D@std@@@0@@Z + ??$_Cmp_chrange@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V12@U?$_Cmp_cs@V?$regex_traits@D@std@@@2@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@000U?$_Cmp_cs@V?$regex_traits@D@std@@@0@@Z + ??$_Cmp_chrange@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V12@U?$_Cmp_icase@V?$regex_traits@D@std@@@2@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@000U?$_Cmp_icase@V?$regex_traits@D@std@@@0@@Z + ??$_Codecvt_do_length@V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@DU_Mbstatet@@@std@@YAHAEBV?$codecvt_utf8@_W$0BAPPPP@$0A@@0@AEAU_Mbstatet@@PEBD2_K@Z + ??$_Compare@PEBDPEBDV?$regex_traits@D@std@@@std@@YAPEBDPEBD000AEBV?$regex_traits@D@0@W4syntax_option_type@regex_constants@0@@Z + ??$_Compare@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@PEBDV?$regex_traits@D@2@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@0PEBD1AEBV?$regex_traits@D@0@W4syntax_option_type@regex_constants@0@@Z + ??$_Compare@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V12@V?$regex_traits@D@2@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@000AEBV?$regex_traits@D@0@W4syntax_option_type@regex_constants@0@@Z + ??$_Construct_in_place@PEADAEAPEAD@std@@YAXAEAPEAD0@Z + ??$_Construct_in_place@PEADAEBQEAD@std@@YAXAEAPEADAEBQEAD@Z + ??$_Construct_in_place@PEAU?$_List_node@HPEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_List_node@HPEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_List_node@HPEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_List_node@HPEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@AEAPEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@0@0@Z + ??$_Construct_in_place@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@AEBQEAU12@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@0@AEBQEAU10@@Z + ??$_Construct_in_place@PEA_WAEAPEA_W@std@@YAXAEAPEA_W0@Z + ??$_Construct_in_place@PEA_WAEBQEA_W@std@@YAXAEAPEA_WAEBQEA_W@Z + ??$_Construct_in_place@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@AEBV12@@std@@YAXAEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@AEBV10@@Z + ??$_Construct_in_place@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@AEBV12@@std@@YAXAEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@0@AEBV10@@Z + ??$_Construct_in_place@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@AEBV12@@std@@YAXAEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@AEBV10@@Z + ??$_Construct_in_place@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@AEBV12@@std@@YAXAEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@AEBV10@@Z + ??$_Construct_in_place@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@AEBV12@@std@@YAXAEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@0@AEBV10@@Z + ??$_Construct_in_place@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@AEBV12@@std@@YAXAEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@AEBV10@@Z + ??$_Construct_n_copies_of_ty@H@?$vector@HV?$allocator@H@std@@@std@@AEAAX_KAEBH@Z + ??$_Construct_n_copies_of_ty@I@?$vector@IV?$allocator@I@std@@@std@@AEAAX_KAEBI@Z + ??$_Construct_n_copies_of_ty@M@?$vector@MV?$allocator@M@std@@@std@@AEAAX_KAEBM@Z + ??$_Construct_n_copies_of_ty@U_Value_init_tag@std@@@?$vector@DV?$allocator@D@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Construct_n_copies_of_ty@U_Value_init_tag@std@@@?$vector@EV?$allocator@E@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Construct_n_copies_of_ty@U_Value_init_tag@std@@@?$vector@MV?$allocator@M@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Construct_n_copies_of_ty@U_Value_init_tag@std@@@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Construct_n_copies_of_ty@U_Value_init_tag@std@@@?$vector@_KV?$allocator@_K@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Construct_n_copies_of_ty@_J@?$vector@_JV?$allocator@_J@std@@@std@@AEAAX_KAEB_J@Z + ??$_Convert_size@_K@std@@YA_K_K@Z + ??$_Copy_backward_memmove@PEAHPEAH@std@@YAPEAHPEAH00@Z + ??$_Copy_backward_memmove@PEAUllama_token_data@@PEAU1@@std@@YAPEAUllama_token_data@@PEAU1@00@Z + ??$_Copy_backward_memmove@PEA_KPEA_K@std@@YAPEA_KPEA_K00@Z + ??$_Copy_backward_unchecked@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@V12@@std@@YA?AV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@V10@00@Z + ??$_Copy_memmove@PEADPEAD@std@@YAPEADPEAD00@Z + ??$_Copy_memmove@PEAHPEAH@std@@YAPEAHPEAH00@Z + ??$_Copy_memmove@PEAIPEAI@std@@YAPEAIPEAI00@Z + ??$_Copy_memmove@PEAMPEAM@std@@YAPEAMPEAM00@Z + ??$_Copy_memmove@PEANPEAN@std@@YAPEANPEAN00@Z + ??$_Copy_memmove@PEAPEAUggml_tensor@@PEAPEAU1@@std@@YAPEAPEAUggml_tensor@@PEAPEAU1@00@Z + ??$_Copy_memmove@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@PEAU123@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@PEAU120@00@Z + ??$_Copy_memmove@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@PEAU123@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@PEAU120@00@Z + ??$_Copy_memmove@PEAU_Loop_vals_t@std@@PEAU12@@std@@YAPEAU_Loop_vals_t@0@PEAU10@00@Z + ??$_Copy_memmove@PEAUdollyv2_layer@@PEAU1@@std@@YAPEAUdollyv2_layer@@PEAU1@00@Z + ??$_Copy_memmove@PEAUfalcon_layer@@PEAU1@@std@@YAPEAUfalcon_layer@@PEAU1@00@Z + ??$_Copy_memmove@PEAUgpt2_layer@@PEAU1@@std@@YAPEAUgpt2_layer@@PEAU1@00@Z + ??$_Copy_memmove@PEAUgpt_neox_layer@@PEAU1@@std@@YAPEAUgpt_neox_layer@@PEAU1@00@Z + ??$_Copy_memmove@PEAUgptj_layer@@PEAU1@@std@@YAPEAUgptj_layer@@PEAU1@00@Z + ??$_Copy_memmove@PEAUllama_layer@@PEAU1@@std@@YAPEAUllama_layer@@PEAU1@00@Z + ??$_Copy_memmove@PEAUllama_sp_bigram@@PEAU1@@std@@YAPEAUllama_sp_bigram@@PEAU1@00@Z + ??$_Copy_memmove@PEAUllama_sp_symbol@@PEAU1@@std@@YAPEAUllama_sp_symbol@@PEAU1@00@Z + ??$_Copy_memmove@PEAUllama_token_data@@PEAU1@@std@@YAPEAUllama_token_data@@PEAU1@00@Z + ??$_Copy_memmove@PEAUmpt_layer@@PEAU1@@std@@YAPEAUmpt_layer@@PEAU1@00@Z + ??$_Copy_memmove@PEAUreplit_layer@@PEAU1@@std@@YAPEAUreplit_layer@@PEAU1@00@Z + ??$_Copy_memmove@PEAUstarcoder_layer@@PEAU1@@std@@YAPEAUstarcoder_layer@@PEAU1@00@Z + ??$_Copy_memmove@PEA_JPEA_J@std@@YAPEA_JPEA_J00@Z + ??$_Copy_memmove@PEBDPEAD@std@@YAPEADPEBD0PEAD@Z + ??$_Copy_memmove@PEBHPEAH@std@@YAPEAHPEBH0PEAH@Z + ??$_Copy_memmove@PEBIPEAI@std@@YAPEAIPEBI0PEAI@Z + ??$_Copy_unchecked@PEAIPEAI@std@@YAPEAIPEAI00@Z + ??$_Copy_unchecked@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@PEAU123@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@PEAU120@00@Z + ??$_Copy_unchecked@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@PEAU123@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@PEAU120@00@Z + ??$_Copy_unchecked@PEAUllama_token_data@@PEAU1@@std@@YAPEAUllama_token_data@@PEAU1@00@Z + ??$_Copy_unchecked@PEBDV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YA?AV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@PEBD0V10@@Z + ??$_Copy_unchecked@PEBHPEAH@std@@YAPEAHPEBH0PEAH@Z + ??$_Copy_unchecked@PEBIPEAI@std@@YAPEAIPEBI0PEAI@Z + ??$_Copy_unchecked@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@V12@@std@@YA?AV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@V10@00@Z + ??$_Deallocate@$0BA@$0A@@std@@YAXPEAX_K@Z + ??$_Destroy_in_place@PEAD@std@@YAXAEAPEAD@Z + ??$_Destroy_in_place@PEAU?$_List_node@HPEAX@std@@@std@@YAXAEAPEAU?$_List_node@HPEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@YAXAEAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@0@@Z + ??$_Destroy_in_place@PEA_W@std@@YAXAEAPEA_W@Z + ??$_Destroy_range@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@PEAV12@@std@@YAXPEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@QEAV10@@Z + ??$_Destroy_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@PEAV12@@std@@YAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@0@QEAV10@@Z + ??$_Destroy_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@PEAV12@@std@@YAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@QEAV10@@Z + ??$_Destroy_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@PEAV12@@std@@YAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@QEAV10@@Z + ??$_Destroy_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@PEAV12@@std@@YAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@0@QEAV10@@Z + ??$_Destroy_range@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@PEAV12@@std@@YAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@QEAV10@@Z + ??$_Destroy_range@V?$allocator@D@std@@@std@@YAXPEADQEADAEAV?$allocator@D@0@@Z + ??$_Destroy_range@V?$allocator@E@std@@@std@@YAXPEAEQEAEAEAV?$allocator@E@0@@Z + ??$_Destroy_range@V?$allocator@H@std@@@std@@YAXPEAHQEAHAEAV?$allocator@H@0@@Z + ??$_Destroy_range@V?$allocator@I@std@@@std@@YAXPEAIQEAIAEAV?$allocator@I@0@@Z + ??$_Destroy_range@V?$allocator@M@std@@@std@@YAXPEAMQEAMAEAV?$allocator@M@0@@Z + ??$_Destroy_range@V?$allocator@N@std@@@std@@YAXPEANQEANAEAV?$allocator@N@0@@Z + ??$_Destroy_range@V?$allocator@PEAUggml_tensor@@@std@@@std@@YAXPEAPEAUggml_tensor@@QEAPEAU1@AEAV?$allocator@PEAUggml_tensor@@@0@@Z + ??$_Destroy_range@V?$allocator@U?$pair@NH@std@@@std@@@std@@YAXPEAU?$pair@NH@0@QEAU10@AEAV?$allocator@U?$pair@NH@std@@@0@@Z + ??$_Destroy_range@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@YAXPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@QEAU10@AEAV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@0@@Z + ??$_Destroy_range@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@YAXPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@QEAU120@AEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@0@@Z + ??$_Destroy_range@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@YAXPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@QEAU120@AEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@0@@Z + ??$_Destroy_range@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@YAXPEAU_Loop_vals_t@0@QEAU10@AEAV?$allocator@U_Loop_vals_t@std@@@0@@Z + ??$_Destroy_range@V?$allocator@Udollyv2_layer@@@std@@@std@@YAXPEAUdollyv2_layer@@QEAU1@AEAV?$allocator@Udollyv2_layer@@@0@@Z + ??$_Destroy_range@V?$allocator@Ufalcon_layer@@@std@@@std@@YAXPEAUfalcon_layer@@QEAU1@AEAV?$allocator@Ufalcon_layer@@@0@@Z + ??$_Destroy_range@V?$allocator@Ugpt2_layer@@@std@@@std@@YAXPEAUgpt2_layer@@QEAU1@AEAV?$allocator@Ugpt2_layer@@@0@@Z + ??$_Destroy_range@V?$allocator@Ugpt_neox_layer@@@std@@@std@@YAXPEAUgpt_neox_layer@@QEAU1@AEAV?$allocator@Ugpt_neox_layer@@@0@@Z + ??$_Destroy_range@V?$allocator@Ugptj_layer@@@std@@@std@@YAXPEAUgptj_layer@@QEAU1@AEAV?$allocator@Ugptj_layer@@@0@@Z + ??$_Destroy_range@V?$allocator@Ullama_buffer@@@std@@@std@@YAXPEAUllama_buffer@@QEAU1@AEAV?$allocator@Ullama_buffer@@@0@@Z + ??$_Destroy_range@V?$allocator@Ullama_layer@@@std@@@std@@YAXPEAUllama_layer@@QEAU1@AEAV?$allocator@Ullama_layer@@@0@@Z + ??$_Destroy_range@V?$allocator@Ullama_load_tensor@@@std@@@std@@YAXPEAUllama_load_tensor@@QEAU1@AEAV?$allocator@Ullama_load_tensor@@@0@@Z + ??$_Destroy_range@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@YAXPEAUllama_load_tensor_shard@@QEAU1@AEAV?$allocator@Ullama_load_tensor_shard@@@0@@Z + ??$_Destroy_range@V?$allocator@Ullama_sp_bigram@@@std@@@std@@YAXPEAUllama_sp_bigram@@QEAU1@AEAV?$allocator@Ullama_sp_bigram@@@0@@Z + ??$_Destroy_range@V?$allocator@Ullama_sp_symbol@@@std@@@std@@YAXPEAUllama_sp_symbol@@QEAU1@AEAV?$allocator@Ullama_sp_symbol@@@0@@Z + ??$_Destroy_range@V?$allocator@Ullama_token_data@@@std@@@std@@YAXPEAUllama_token_data@@QEAU1@AEAV?$allocator@Ullama_token_data@@@0@@Z + ??$_Destroy_range@V?$allocator@Umpt_layer@@@std@@@std@@YAXPEAUmpt_layer@@QEAU1@AEAV?$allocator@Umpt_layer@@@0@@Z + ??$_Destroy_range@V?$allocator@Ureplit_layer@@@std@@@std@@YAXPEAUreplit_layer@@QEAU1@AEAV?$allocator@Ureplit_layer@@@0@@Z + ??$_Destroy_range@V?$allocator@Ustarcoder_layer@@@std@@@std@@YAXPEAUstarcoder_layer@@QEAU1@AEAV?$allocator@Ustarcoder_layer@@@0@@Z + ??$_Destroy_range@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@YAXPEAUtoken_score@llama_vocab@@QEAU12@AEAV?$allocator@Utoken_score@llama_vocab@@@0@@Z + ??$_Destroy_range@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YAXPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@QEAV10@AEAV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@@Z + ??$_Destroy_range@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@YAXPEAV?$sub_match@PEBD@0@QEAV10@AEAV?$allocator@V?$sub_match@PEBD@std@@@0@@Z + ??$_Destroy_range@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@YAXPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@QEAV10@AEAV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@0@@Z + ??$_Destroy_range@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@YAXPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@0@QEAV10@AEAV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@0@@Z + ??$_Destroy_range@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@YAXPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@0@QEAV10@AEAV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@0@@Z + ??$_Destroy_range@V?$allocator@Vthread@std@@@std@@@std@@YAXPEAVthread@0@QEAV10@AEAV?$allocator@Vthread@std@@@0@@Z + ??$_Destroy_range@V?$allocator@_J@std@@@std@@YAXPEA_JQEA_JAEAV?$allocator@_J@0@@Z + ??$_Destroy_range@V?$allocator@_K@std@@@std@@YAXPEA_KQEA_KAEAV?$allocator@_K@0@@Z + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@D@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@E@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@I@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@M@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@N@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@U?$pair@NH@std@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Udollyv2_layer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Ufalcon_layer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Ugpt2_layer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Ugptj_layer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Ullama_buffer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Ullama_layer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Umpt_layer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Ureplit_layer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@Vthread@std@@@std@@@std@@QEAAXXZ + ??$_Emplace_back@$$V@?$_Uninitialized_backout_al@V?$allocator@_K@std@@@std@@QEAAXXZ + ??$_Emplace_back@AEAD@?$_Uninitialized_backout_al@V?$allocator@D@std@@@std@@QEAAXAEAD@Z + ??$_Emplace_back@AEAH@?$_Uninitialized_backout_al@V?$allocator@H@std@@@std@@QEAAXAEAH@Z + ??$_Emplace_back@AEAI@?$_Uninitialized_backout_al@V?$allocator@I@std@@@std@@QEAAXAEAI@Z + ??$_Emplace_back@AEAM@?$_Uninitialized_backout_al@V?$allocator@N@std@@@std@@QEAAXAEAM@Z + ??$_Emplace_back@AEAN@?$_Uninitialized_backout_al@V?$allocator@N@std@@@std@@QEAAXAEAN@Z + ??$_Emplace_back@AEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@QEAAXAEAU_Grp_t@?$_Tgt_state_t@PEBD@1@@Z + ??$_Emplace_back@AEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAAXAEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@@Z + ??$_Emplace_back@AEBD@?$_Uninitialized_backout_al@V?$allocator@D@std@@@std@@QEAAXAEBD@Z + ??$_Emplace_back@AEBH@?$_Uninitialized_backout_al@V?$allocator@H@std@@@std@@QEAAXAEBH@Z + ??$_Emplace_back@AEBI@?$_Uninitialized_backout_al@V?$allocator@I@std@@@std@@QEAAXAEBI@Z + ??$_Emplace_back@AEBM@?$_Uninitialized_backout_al@V?$allocator@M@std@@@std@@QEAAXAEBM@Z + ??$_Emplace_back@AEBN@?$_Uninitialized_backout_al@V?$allocator@N@std@@@std@@QEAAXAEBN@Z + ??$_Emplace_back@AEBV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@?$_Uninitialized_backout@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@QEAAXAEBV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@1@@Z + ??$_Emplace_back@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@QEAAXAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@1@@Z + ??$_Emplace_back@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@QEAAXAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@1@@Z + ??$_Emplace_back@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@QEAAXAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@1@@Z + ??$_Emplace_back@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@QEAAXAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@1@@Z + ??$_Emplace_back@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@QEAAXAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@1@@Z + ??$_Emplace_back@AEB_J@?$_Uninitialized_backout_al@V?$allocator@_J@std@@@std@@QEAAXAEB_J@Z + ??$_Emplace_back@D@?$_Uninitialized_backout_al@V?$allocator@D@std@@@std@@QEAAX$$QEAD@Z + ??$_Emplace_back@H@?$_Uninitialized_backout_al@V?$allocator@H@std@@@std@@QEAAX$$QEAH@Z + ??$_Emplace_back@I@?$_Uninitialized_backout_al@V?$allocator@I@std@@@std@@QEAAX$$QEAI@Z + ??$_Emplace_back@M@?$_Uninitialized_backout_al@V?$allocator@M@std@@@std@@QEAAX$$QEAM@Z + ??$_Emplace_back@N@?$_Uninitialized_backout_al@V?$allocator@N@std@@@std@@QEAAX$$QEAN@Z + ??$_Emplace_back@PEAUggml_tensor@@@?$_Uninitialized_backout_al@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAAX$$QEAPEAUggml_tensor@@@Z + ??$_Emplace_back@U?$pair@NH@std@@@?$_Uninitialized_backout_al@V?$allocator@U?$pair@NH@std@@@std@@@std@@QEAAX$$QEAU?$pair@NH@1@@Z + ??$_Emplace_back@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@?$_Uninitialized_backout_al@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAAX$$QEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$_Emplace_back@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@QEAAX$$QEAU_Grp_t@?$_Tgt_state_t@PEBD@1@@Z + ??$_Emplace_back@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAAX$$QEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@@Z + ??$_Emplace_back@U_Loop_vals_t@std@@@?$_Uninitialized_backout_al@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@QEAAX$$QEAU_Loop_vals_t@1@@Z + ??$_Emplace_back@Udollyv2_layer@@@?$_Uninitialized_backout_al@V?$allocator@Udollyv2_layer@@@std@@@std@@QEAAX$$QEAUdollyv2_layer@@@Z + ??$_Emplace_back@Ufalcon_layer@@@?$_Uninitialized_backout_al@V?$allocator@Ufalcon_layer@@@std@@@std@@QEAAX$$QEAUfalcon_layer@@@Z + ??$_Emplace_back@Ugpt2_layer@@@?$_Uninitialized_backout_al@V?$allocator@Ugpt2_layer@@@std@@@std@@QEAAX$$QEAUgpt2_layer@@@Z + ??$_Emplace_back@Ugpt_neox_layer@@@?$_Uninitialized_backout_al@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEAAX$$QEAUgpt_neox_layer@@@Z + ??$_Emplace_back@Ugptj_layer@@@?$_Uninitialized_backout_al@V?$allocator@Ugptj_layer@@@std@@@std@@QEAAX$$QEAUgptj_layer@@@Z + ??$_Emplace_back@Ullama_layer@@@?$_Uninitialized_backout_al@V?$allocator@Ullama_layer@@@std@@@std@@QEAAX$$QEAUllama_layer@@@Z + ??$_Emplace_back@Ullama_load_tensor@@@?$_Uninitialized_backout_al@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAAX$$QEAUllama_load_tensor@@@Z + ??$_Emplace_back@Ullama_load_tensor_shard@@@?$_Uninitialized_backout_al@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAAX$$QEAUllama_load_tensor_shard@@@Z + ??$_Emplace_back@Ullama_sp_bigram@@@?$_Uninitialized_backout_al@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAAX$$QEAUllama_sp_bigram@@@Z + ??$_Emplace_back@Ullama_sp_symbol@@@?$_Uninitialized_backout_al@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEAAX$$QEAUllama_sp_symbol@@@Z + ??$_Emplace_back@Ullama_token_data@@@?$_Uninitialized_backout_al@V?$allocator@Ullama_token_data@@@std@@@std@@QEAAX$$QEAUllama_token_data@@@Z + ??$_Emplace_back@Umpt_layer@@@?$_Uninitialized_backout_al@V?$allocator@Umpt_layer@@@std@@@std@@QEAAX$$QEAUmpt_layer@@@Z + ??$_Emplace_back@Ureplit_layer@@@?$_Uninitialized_backout_al@V?$allocator@Ureplit_layer@@@std@@@std@@QEAAX$$QEAUreplit_layer@@@Z + ??$_Emplace_back@Ustarcoder_layer@@@?$_Uninitialized_backout_al@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEAAX$$QEAUstarcoder_layer@@@Z + ??$_Emplace_back@Utoken_score@llama_vocab@@@?$_Uninitialized_backout_al@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAAX$$QEAUtoken_score@llama_vocab@@@Z + ??$_Emplace_back@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Uninitialized_backout_al@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@QEAAX$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Emplace_back@V?$sub_match@PEBD@std@@@?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAAX$$QEAV?$sub_match@PEBD@1@@Z + ??$_Emplace_back@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAAX$$QEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@@Z + ??$_Emplace_back@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@?$_Uninitialized_backout_al@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@QEAAX$$QEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@1@@Z + ??$_Emplace_back@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@?$_Uninitialized_backout_al@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@QEAAX$$QEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@1@@Z + ??$_Emplace_back@Vthread@std@@@?$_Uninitialized_backout_al@V?$allocator@Vthread@std@@@std@@@std@@QEAAX$$QEAVthread@1@@Z + ??$_Emplace_back@_J@?$_Uninitialized_backout_al@V?$allocator@_J@std@@@std@@QEAAX$$QEA_J@Z + ??$_Emplace_back_with_unused_capacity@AEAPEAUfalcon_file_loader@@@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEAA?A_TAEAPEAUfalcon_file_loader@@@Z + ??$_Emplace_back_with_unused_capacity@AEAPEAUllama_file_loader@@@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEAA?A_TAEAPEAUllama_file_loader@@@Z + ??$_Emplace_back_with_unused_capacity@AEAUllama_sp_symbol@@@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEAA?A_TAEAUllama_sp_symbol@@@Z + ??$_Emplace_back_with_unused_capacity@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEAA?A_TAEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Emplace_back_with_unused_capacity@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAPEAUggml_tensor@@@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAA?A_TAEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEAPEAUggml_tensor@@@Z + ??$_Emplace_back_with_unused_capacity@AEBH@?$vector@HV?$allocator@H@std@@@std@@AEAA?A_TAEBH@Z + ??$_Emplace_back_with_unused_capacity@AEBM@?$vector@MV?$allocator@M@std@@@std@@AEAA?A_TAEBM@Z + ??$_Emplace_back_with_unused_capacity@AEBN@?$vector@NV?$allocator@N@std@@@std@@AEAA?A_TAEBN@Z + ??$_Emplace_back_with_unused_capacity@AEBQEAUggml_tensor@@@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEAA?A_TAEBQEAUggml_tensor@@@Z + ??$_Emplace_back_with_unused_capacity@AEBUllama_load_tensor_shard@@@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEAA?A_TAEBUllama_load_tensor_shard@@@Z + ??$_Emplace_back_with_unused_capacity@AEBUllama_sp_bigram@@@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEAA?A_TAEBUllama_sp_bigram@@@Z + ??$_Emplace_back_with_unused_capacity@AEBUllama_token_data@@@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAA?A_TAEBUllama_token_data@@@Z + ??$_Emplace_back_with_unused_capacity@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEAA?A_TAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Emplace_back_with_unused_capacity@H@?$vector@HV?$allocator@H@std@@@std@@AEAA?A_T$$QEAH@Z + ??$_Emplace_back_with_unused_capacity@N@?$vector@NV?$allocator@N@std@@@std@@AEAA?A_T$$QEAN@Z + ??$_Emplace_back_with_unused_capacity@U?$pair@NH@std@@@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAA?A_T$$QEAU?$pair@NH@1@@Z + ??$_Emplace_back_with_unused_capacity@Ullama_token_data@@@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAA?A_T$$QEAUllama_token_data@@@Z + ??$_Emplace_back_with_unused_capacity@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEAA?A_T$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Emplace_back_with_unused_capacity@Vthread@std@@@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAA?A_T$$QEAVthread@1@@Z + ??$_Emplace_hint@AEBU?$pair@$$CBW4e_model@@_K@std@@@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@1@QEAU21@AEBU?$pair@$$CBW4e_model@@_K@1@@Z + ??$_Emplace_hint@AEBU?$pair@$$CBW4falcon_e_model@@_K@std@@@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@1@QEAU21@AEBU?$pair@$$CBW4falcon_e_model@@_K@1@@Z + ??$_Emplace_reallocate@AEAPEAUfalcon_file_loader@@@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@QEAAPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@1@QEAV21@AEAPEAUfalcon_file_loader@@@Z + ??$_Emplace_reallocate@AEAPEAUllama_file_loader@@@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@QEAAPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@1@QEAV21@AEAPEAUllama_file_loader@@@Z + ??$_Emplace_reallocate@AEAUllama_sp_symbol@@@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEAAPEAUllama_sp_symbol@@QEAU2@AEAU2@@Z + ??$_Emplace_reallocate@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAAPEAUllama_load_tensor@@QEAU2@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Emplace_reallocate@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAPEAUggml_tensor@@@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAAPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@QEAU21@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEAPEAUggml_tensor@@@Z + ??$_Emplace_reallocate@AEBH@?$vector@HV?$allocator@H@std@@@std@@QEAAPEAHQEAHAEBH@Z + ??$_Emplace_reallocate@AEBM@?$vector@MV?$allocator@M@std@@@std@@QEAAPEAMQEAMAEBM@Z + ??$_Emplace_reallocate@AEBN@?$vector@NV?$allocator@N@std@@@std@@QEAAPEANQEANAEBN@Z + ??$_Emplace_reallocate@AEBQEAUggml_tensor@@@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAAPEAPEAUggml_tensor@@QEAPEAU2@AEBQEAU2@@Z + ??$_Emplace_reallocate@AEBUllama_load_tensor_shard@@@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAAPEAUllama_load_tensor_shard@@QEAU2@AEBU2@@Z + ??$_Emplace_reallocate@AEBUllama_sp_bigram@@@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAAPEAUllama_sp_bigram@@QEAU2@AEBU2@@Z + ??$_Emplace_reallocate@AEBUllama_token_data@@@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAAPEAUllama_token_data@@QEAU2@AEBU2@@Z + ??$_Emplace_reallocate@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@QEAV21@AEBV21@@Z + ??$_Emplace_reallocate@H@?$vector@HV?$allocator@H@std@@@std@@QEAAPEAHQEAH$$QEAH@Z + ??$_Emplace_reallocate@N@?$vector@NV?$allocator@N@std@@@std@@QEAAPEANQEAN$$QEAN@Z + ??$_Emplace_reallocate@U?$pair@NH@std@@@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAAPEAU?$pair@NH@1@QEAU21@$$QEAU21@@Z + ??$_Emplace_reallocate@Ullama_token_data@@@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAAPEAUllama_token_data@@QEAU2@$$QEAU2@@Z + ??$_Emplace_reallocate@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@QEAV21@$$QEAV21@@Z + ??$_Emplace_reallocate@Vthread@std@@@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEAAPEAVthread@1@QEAV21@$$QEAV21@@Z + ??$_Erase_head@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@@Z + ??$_Erase_head@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@@Z + ??$_Erase_head@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@@Z + ??$_Erase_head@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@@Z + ??$_Erase_head@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@@Z + ??$_Erase_tree@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@1@@Z + ??$_Erase_tree@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@@Z + ??$_Erase_tree@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@@Z + ??$_Erase_tree@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@1@@Z + ??$_Erase_tree@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@QEAAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@1@@Z + ??$_Eval@V?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@std@@@?$discrete_distribution@H@std@@AEBAHAEAV?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@1@AEBUparam_type@01@@Z + ??$_Fgetc@D@std@@YA_NAEADPEAU_iobuf@@@Z + ??$_Fill_vbool@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@_N@std@@YAXV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@0AEB_N@Z + ??$_Fill_zero_memset@PEAH@std@@YAXPEAH_K@Z + ??$_Fill_zero_memset@PEAI@std@@YAXPEAI_K@Z + ??$_Fill_zero_memset@PEAM@std@@YAXPEAM_K@Z + ??$_Fill_zero_memset@PEAN@std@@YAXPEAN_K@Z + ??$_Fill_zero_memset@PEA_J@std@@YAXPEA_J_K@Z + ??$_Find@E@_Bitmap@std@@QEBA_NE@Z + ??$_Find@H@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@AEBAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@1@AEBH_K@Z + ??$_Find@H@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@AEBAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@1@AEBH@Z + ??$_Find@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@AEBAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@_K@Z + ??$_Find@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@AEBAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@_K@Z + ??$_Find@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@AEBAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@_K@Z + ??$_Find@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@AEBAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Find@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@AEBAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Find_hint@W4e_model@@@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEBA?AU?$_Tree_find_hint_result@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@QEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@1@AEBW4e_model@@@Z + ??$_Find_hint@W4falcon_e_model@@@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEBA?AU?$_Tree_find_hint_result@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@QEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@1@AEBW4falcon_e_model@@@Z + ??$_Find_last@H@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEBA?AU?$_Hash_find_last_result@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@1@AEBH_K@Z + ??$_Find_last@H@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEBA?AU?$_Hash_find_last_result@PEAU?$_List_node@HPEAX@std@@@1@AEBH_K@Z + ??$_Find_last@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBA?AU?$_Hash_find_last_result@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@_K@Z + ??$_Find_last@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBA?AU?$_Hash_find_last_result@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@_K@Z + ??$_Find_last@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEBA?AU?$_Hash_find_last_result@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@_K@Z + ??$_Find_last@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEBA?AU?$_Hash_find_last_result@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@_K@Z + ??$_Find_lower_bound@H@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@IEBA?AU?$_Tree_find_result@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@AEBH@Z + ??$_Find_lower_bound@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBA?AU?$_Tree_find_result@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Find_lower_bound@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBA?AU?$_Tree_find_result@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Find_lower_bound@W4e_model@@@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEBA?AU?$_Tree_find_result@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@AEBW4e_model@@@Z + ??$_Find_lower_bound@W4falcon_e_model@@@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEBA?AU?$_Tree_find_result@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@AEBW4falcon_e_model@@@Z + ??$_Find_unchecked1@PEBDE@std@@YAPEBDPEBDQEBDAEBEU?$integral_constant@_N$00@0@@Z + ??$_Find_unchecked1@PEBDW4_Meta_type@std@@@std@@YAPEBDPEBDQEBDAEBW4_Meta_type@0@U?$integral_constant@_N$0A@@0@@Z + ??$_Find_unchecked1@PEBHH@std@@YAPEBHPEBHQEBHAEBHU?$integral_constant@_N$0A@@0@@Z + ??$_Find_unchecked@PEBDE@std@@YAPEBDQEBD0AEBE@Z + ??$_Find_unchecked@PEBDW4_Meta_type@std@@@std@@YAPEBDQEBD0AEBW4_Meta_type@0@@Z + ??$_Find_unchecked@PEBHH@std@@YAPEBHQEBH0AEBH@Z + ??$_Fnv1a_append_value@H@std@@YA_K_KAEBH@Z + ??$_Format1@V?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEBA?AV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@V21@PEBD1W4match_flag_type@regex_constants@1@@Z + ??$_Format_default@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@PEBDV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@YA?AV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEBV?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@0@V10@PEBD2W4match_flag_type@regex_constants@0@@Z + ??$_Format_sed@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@PEBDV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@YA?AV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEBV?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@0@V10@PEBD2W4match_flag_type@regex_constants@0@@Z + ??$_Fputc@D@std@@YA_NDPEAU_iobuf@@@Z + ??$_Free_non_head@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@?$_List_node@HPEAX@std@@SAXAEAV?$allocator@U?$_List_node@HPEAX@std@@@1@PEAU01@@Z + ??$_Free_non_head@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Free_non_head@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Free_non_head@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Free_non_head@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Free_non_head@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@?$_List_node@HPEAX@std@@SAXAEAV?$allocator@U?$_List_node@HPEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode0@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@?$_List_node@HPEAX@std@@SAXAEAV?$allocator@U?$_List_node@HPEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Freenode@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@PEAU01@@Z + ??$_Get@D@_Cl_names@std@@QEBAPEBDXZ + ??$_Get_size_of_n@$00@std@@YA_K_K@Z + ??$_Get_size_of_n@$01@std@@YA_K_K@Z + ??$_Get_size_of_n@$03@std@@YA_K_K@Z + ??$_Get_size_of_n@$07@std@@YA_K_K@Z + ??$_Get_size_of_n@$0BA@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0BI@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0CA@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0CI@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0DA@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0DI@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0EA@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0EI@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0FA@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0GA@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0HA@@std@@YA_K_K@Z + ??$_Get_size_of_n@$0M@@std@@YA_K_K@Z + ??$_Get_unwrapped@AEAPEAD@std@@YA?A_TAEAPEAD@Z + ??$_Get_unwrapped@AEAPEAI@std@@YA?A_TAEAPEAI@Z + ??$_Get_unwrapped@AEAPEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YA?A_TAEAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@@Z + ??$_Get_unwrapped@AEAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YA?A_TAEAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAPEAUllama_token_data@@@std@@YA?A_TAEAPEAUllama_token_data@@@Z + ??$_Get_unwrapped@AEAPEBD@std@@YA?A_TAEAPEBD@Z + ??$_Get_unwrapped@AEAPEBH@std@@YA?A_TAEAPEBH@Z + ??$_Get_unwrapped@AEAPEBU?$pair@$$CBW4e_model@@_K@std@@@std@@YA?A_TAEAPEBU?$pair@$$CBW4e_model@@_K@0@@Z + ??$_Get_unwrapped@AEAPEBU?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@YA?A_TAEAPEBU?$pair@$$CBW4falcon_e_model@@_K@0@@Z + ??$_Get_unwrapped@AEAV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@@std@@YA?A_TAEAV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@YA?A_TAEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@@Z + ??$_Get_unwrapped@AEAV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@YA?A_TAEAV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@YA?A_TAEAV?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@@std@@YA?A_TAEAV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@@std@@YA?A_TAEAV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@@std@@YA?A_TAEAV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@@std@@YA?A_TAEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@@std@@YA?A_TAEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@@std@@YA?A_TAEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@std@@@std@@YA?A_TAEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@std@@@std@@YA?A_TAEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@std@@@std@@YA?A_TAEAV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAD@std@@YA?A_TAEBQEAD@Z + ??$_Get_unwrapped@AEBQEAH@std@@YA?A_TAEBQEAH@Z + ??$_Get_unwrapped@AEBQEAI@std@@YA?A_TAEBQEAI@Z + ??$_Get_unwrapped@AEBQEAM@std@@YA?A_TAEBQEAM@Z + ??$_Get_unwrapped@AEBQEAN@std@@YA?A_TAEBQEAN@Z + ??$_Get_unwrapped@AEBQEAPEAUggml_tensor@@@std@@YA?A_TAEBQEAPEAUggml_tensor@@@Z + ??$_Get_unwrapped@AEBQEAU?$pair@NH@std@@@std@@YA?A_TAEBQEAU?$pair@NH@0@@Z + ??$_Get_unwrapped@AEBQEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@YA?A_TAEBQEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@@Z + ??$_Get_unwrapped@AEBQEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YA?A_TAEBQEAU_Grp_t@?$_Tgt_state_t@PEBD@0@@Z + ??$_Get_unwrapped@AEBQEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YA?A_TAEBQEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAU_Loop_vals_t@std@@@std@@YA?A_TAEBQEAU_Loop_vals_t@0@@Z + ??$_Get_unwrapped@AEBQEAUdollyv2_layer@@@std@@YA?A_TAEBQEAUdollyv2_layer@@@Z + ??$_Get_unwrapped@AEBQEAUfalcon_layer@@@std@@YA?A_TAEBQEAUfalcon_layer@@@Z + ??$_Get_unwrapped@AEBQEAUgpt2_layer@@@std@@YA?A_TAEBQEAUgpt2_layer@@@Z + ??$_Get_unwrapped@AEBQEAUgpt_neox_layer@@@std@@YA?A_TAEBQEAUgpt_neox_layer@@@Z + ??$_Get_unwrapped@AEBQEAUgptj_layer@@@std@@YA?A_TAEBQEAUgptj_layer@@@Z + ??$_Get_unwrapped@AEBQEAUllama_layer@@@std@@YA?A_TAEBQEAUllama_layer@@@Z + ??$_Get_unwrapped@AEBQEAUllama_load_tensor@@@std@@YA?A_TAEBQEAUllama_load_tensor@@@Z + ??$_Get_unwrapped@AEBQEAUllama_load_tensor_shard@@@std@@YA?A_TAEBQEAUllama_load_tensor_shard@@@Z + ??$_Get_unwrapped@AEBQEAUllama_sp_bigram@@@std@@YA?A_TAEBQEAUllama_sp_bigram@@@Z + ??$_Get_unwrapped@AEBQEAUllama_sp_symbol@@@std@@YA?A_TAEBQEAUllama_sp_symbol@@@Z + ??$_Get_unwrapped@AEBQEAUllama_token_data@@@std@@YA?A_TAEBQEAUllama_token_data@@@Z + ??$_Get_unwrapped@AEBQEAUmpt_layer@@@std@@YA?A_TAEBQEAUmpt_layer@@@Z + ??$_Get_unwrapped@AEBQEAUreplit_layer@@@std@@YA?A_TAEBQEAUreplit_layer@@@Z + ??$_Get_unwrapped@AEBQEAUstarcoder_layer@@@std@@YA?A_TAEBQEAUstarcoder_layer@@@Z + ??$_Get_unwrapped@AEBQEAUtoken_score@llama_vocab@@@std@@YA?A_TAEBQEAUtoken_score@llama_vocab@@@Z + ??$_Get_unwrapped@AEBQEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@YA?A_TAEBQEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@@Z + ??$_Get_unwrapped@AEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@YA?A_TAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@YA?A_TAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@YA?A_TAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@YA?A_TAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@YA?A_TAEBQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YA?A_TAEBQEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@@Z + ??$_Get_unwrapped@AEBQEAV?$sub_match@PEBD@std@@@std@@YA?A_TAEBQEAV?$sub_match@PEBD@0@@Z + ??$_Get_unwrapped@AEBQEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YA?A_TAEBQEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@YA?A_TAEBQEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@YA?A_TAEBQEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@0@@Z + ??$_Get_unwrapped@AEBQEAVthread@std@@@std@@YA?A_TAEBQEAVthread@0@@Z + ??$_Get_unwrapped@AEBQEA_J@std@@YA?A_TAEBQEA_J@Z + ??$_Get_unwrapped@AEBQEBD@std@@YA?A_TAEBQEBD@Z + ??$_Get_unwrapped@AEBQEBH@std@@YA?A_TAEBQEBH@Z + ??$_Get_unwrapped@AEBQEBI@std@@YA?A_TAEBQEBI@Z + ??$_Get_unwrapped@AEBV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@YA?A_TAEBV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@@Z + ??$_Get_unwrapped@AEBV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@YA?A_TAEBV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBV?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@YA?A_TAEBV?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@@std@@YA?A_TAEBV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@@std@@YA?A_TAEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@0@@Z + ??$_Get_unwrapped@AEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@std@@@std@@YA?A_TAEBV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@0@@Z + ??$_Get_unwrapped_n@AEAPEAH_J@std@@YA?A_TAEAPEAH_J@Z + ??$_Get_unwrapped_n@AEAPEAUllama_token_data@@_J@std@@YA?A_TAEAPEAUllama_token_data@@_J@Z + ??$_Get_unwrapped_n@AEAV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@_J@std@@YA?A_TAEAV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@_J@Z + ??$_Get_unwrapped_n@AEAV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@_J@std@@YA?A_TAEAV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@_J@Z + ??$_Get_unwrapped_n@AEBQEBD_J@std@@YA?A_TAEBQEBD_J@Z + ??$_Get_unwrapped_n@AEBQEBI_J@std@@YA?A_TAEBQEBI_J@Z + ??$_Get_unwrapped_unverified@AEAV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YA?A_TAEAV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@@Z + ??$_Getvals@_W@?$time_get@DV?$istreambuf_iterator@DU?$char_traits@D@std@@@std@@@std@@IEAAX_WAEBV_Locinfo@1@@Z + ??$_Getvals@_W@?$time_get@_WV?$istreambuf_iterator@_WU?$char_traits@_W@std@@@std@@@std@@IEAAX_WAEBV_Locinfo@1@@Z + ??$_Hash_array_representation@D@std@@YA_KQEBD_K@Z + ??$_Hash_representation@H@std@@YA_KAEBH@Z + ??$_Idl_distance@PEBDPEBD@std@@YA?A_PAEBQEBD0@Z + ??$_Idl_distance@PEBIPEBI@std@@YA?A_PAEBQEBI0@Z + ??$_Idl_distance@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@V12@@std@@YA?A_PAEBV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@0@Z + ??$_Idl_distance@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@PEBH@std@@YA?A_PAEBQEBH0@Z + ??$_Idl_distance@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@std@@PEAUllama_token_data@@@std@@YA?A_PAEBQEAUllama_token_data@@0@Z + ??$_Immortalize_memcpy_image@V_Generic_error_category@std@@@std@@YAAEBV_Generic_error_category@0@XZ + ??$_Insert@PEBD@?$_Buf@D@std@@QEAAXPEBD0@Z + ??$_Integral_to_string@DH@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@H@Z + ??$_Is_all_bits_zero@H@std@@YA_NAEBH@Z + ??$_Is_all_bits_zero@I@std@@YA_NAEBI@Z + ??$_Is_all_bits_zero@M@std@@YA_NAEBM@Z + ??$_Is_all_bits_zero@N@std@@YA_NAEBN@Z + ??$_Is_all_bits_zero@_J@std@@YA_NAEB_J@Z + ??$_Kfn@$$CBHH@?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@SAAEBHAEBU?$pair@$$CBHH@1@@Z + ??$_Kfn@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@SAAEBHAEBU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@@Z + ??$_Kfn@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@SAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@@Z + ??$_Kfn@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@SAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@@Z + ??$_Kfn@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@SAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$_Kfn@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@SAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$_Kfn@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@SAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@1@@Z + ??$_Kfn@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@SAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@@Z + ??$_Kfn@$$CBW4e_model@@_K@?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@SAAEBW4e_model@@AEBU?$pair@$$CBW4e_model@@_K@1@@Z + ??$_Kfn@$$CBW4falcon_e_model@@_K@?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@SAAEBW4falcon_e_model@@AEBU?$pair@$$CBW4falcon_e_model@@_K@1@@Z + ??$_LStrcoll@D@std@@YAHPEBD000PEBU_Collvec@@@Z + ??$_LStrxfrm@D@std@@YA_KPEAD0PEBD1PEBU_Collvec@@@Z + ??$_Lookup_coll@PEBDD@std@@YAPEBDPEBD0PEBU?$_Sequence@D@0@@Z + ??$_Lookup_coll@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@D@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@0PEBU?$_Sequence@D@0@@Z + ??$_Lookup_equiv@DV?$regex_traits@D@std@@@std@@YA_NEPEBU?$_Sequence@D@0@AEBV?$regex_traits@D@0@@Z + ??$_Lookup_range@D@std@@YA_NIPEBU?$_Buf@D@0@@Z + ??$_Lower_bound_duplicate@H@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@IEBA_NQEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@1@AEBH@Z + ??$_Lower_bound_duplicate@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBA_NQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Lower_bound_duplicate@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBA_NQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Lower_bound_duplicate@W4e_model@@@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEBA_NQEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@1@AEBW4e_model@@@Z + ??$_Lower_bound_duplicate@W4falcon_e_model@@@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEBA_NQEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@1@AEBW4falcon_e_model@@@Z + ??$_Maklocstr@D@std@@YAPEADPEBDPEADAEBU_Cvtvec@@@Z + ??$_Maklocstr@_W@std@@YAPEA_WPEBDPEA_WAEBU_Cvtvec@@@Z + ??$_Mark@E@_Bitmap@std@@QEAAXE@Z + ??$_Mark@I@_Bitmap@std@@QEAAXI@Z + ??$_Match@V?$allocator@V?$sub_match@PEBD@std@@@std@@@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@QEAA_NPEAV?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@1@_N@Z + ??$_Match@V?$allocator@V?$sub_match@PEBD@std@@@std@@@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@QEAA_NPEBDPEAV?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@1@_N@Z + ??$_Match@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@QEAA_NPEAV?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@1@_N@Z + ??$_Match@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@QEAA_NV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@1@PEAV?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@1@_N@Z + ??$_Memcmp_ranges@PEBIPEBI@std@@YAHPEBI00@Z + ??$_Move_backward_unchecked@PEAHPEAH@std@@YAPEAHPEAH00@Z + ??$_Move_backward_unchecked@PEAUllama_token_data@@PEAU1@@std@@YAPEAUllama_token_data@@PEAU1@00@Z + ??$_Move_backward_unchecked@PEA_KPEA_K@std@@YAPEA_KPEA_K00@Z + ??$_Move_unchecked@PEAIPEAI@std@@YAPEAIPEAI00@Z + ??$_Next_iter@PEAUllama_token_data@@@std@@YAPEAUllama_token_data@@PEAU1@@Z + ??$_Next_iter@PEA_K@std@@YAPEA_KPEA_K@Z + ??$_Next_iter@PEBN@std@@YAPEBNPEBN@Z + ??$_Pass_fn@U?$_Cmp_icase@V?$_Regex_traits@D@std@@@std@@$0A@@std@@YA?AU?$_Cmp_icase@V?$_Regex_traits@D@std@@@0@U10@@Z + ??$_Pass_fn@Ucomparator@llama_sp_bigram@@$0A@@std@@YA?AUcomparator@llama_sp_bigram@@U12@@Z + ??$_Pocca@V?$allocator@D@std@@@std@@YAXAEAV?$allocator@D@0@AEBV10@@Z + ??$_Pocca@V?$allocator@I@std@@@std@@YAXAEAV?$allocator@I@0@AEBV10@@Z + ??$_Pocca@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@YAXAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@0@AEBV10@@Z + ??$_Pocca@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@YAXAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@0@AEBV10@@Z + ??$_Pocma@V?$allocator@D@std@@@std@@YAXAEAV?$allocator@D@0@0@Z + ??$_Pocma@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@YAXAEAV?$allocator@U?$_List_node@HPEAX@std@@@0@0@Z + ??$_Pocma@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@YAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@0@0@Z + ??$_Pocma@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@YAXAEAV?$allocator@Utoken_score@llama_vocab@@@0@0@Z + ??$_Pocma@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@YAXAEAV?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@0@0@Z + ??$_Pocma@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@YAXAEAV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@0@0@Z + ??$_Pop_heap_hole_by_index@PEAUllama_sp_bigram@@U1@Ucomparator@1@@std@@YAXPEAUllama_sp_bigram@@_J1$$QEAU1@Ucomparator@1@@Z + ??$_Pop_heap_hole_unchecked@PEAUllama_sp_bigram@@U1@Ucomparator@1@@std@@YAXPEAUllama_sp_bigram@@00$$QEAU1@Ucomparator@1@@Z + ??$_Pop_heap_unchecked@PEAUllama_sp_bigram@@Ucomparator@1@@std@@YAXPEAUllama_sp_bigram@@0Ucomparator@1@@Z + ??$_Prev_iter@PEAUllama_token_data@@@std@@YAPEAUllama_token_data@@PEAU1@@Z + ??$_Prev_iter@PEA_K@std@@YAPEA_KPEA_K@Z + ??$_Prev_iter@PEBD@std@@YAPEBDPEBD@Z + ??$_Prev_iter@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@@Z + ??$_Prev_iter@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@@std@@YA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@0@V10@@Z + ??$_Push_heap_by_index@PEAUllama_sp_bigram@@U1@Ucomparator@1@@std@@YAXPEAUllama_sp_bigram@@_J1$$QEAU1@Ucomparator@1@@Z + ??$_Range_construct_or_tidy@PEAD@?$vector@DV?$allocator@D@std@@@std@@AEAAXPEAD0Uforward_iterator_tag@1@@Z + ??$_Range_construct_or_tidy@PEAM@?$vector@NV?$allocator@N@std@@@std@@AEAAXPEAM0Uforward_iterator_tag@1@@Z + ??$_Range_construct_or_tidy@PEAN@?$vector@NV?$allocator@N@std@@@std@@AEAAXPEAN0Uforward_iterator_tag@1@@Z + ??$_Range_construct_or_tidy@PEBD@?$vector@DV?$allocator@D@std@@@std@@AEAAXPEBD0Uforward_iterator_tag@1@@Z + ??$_Range_construct_or_tidy@PEBH@?$vector@HV?$allocator@H@std@@@std@@AEAAXPEBH0Uforward_iterator_tag@1@@Z + ??$_Range_construct_or_tidy@PEBI@?$vector@IV?$allocator@I@std@@@std@@AEAAXPEBI0Uforward_iterator_tag@1@@Z + ??$_Range_construct_or_tidy@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@?$vector@HV?$allocator@H@std@@@std@@AEAAXV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@1@0Uforward_iterator_tag@1@@Z + ??$_Reallocate_for@V@@D@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAAEAV01@_KV@@D@Z + ??$_Reallocate_for@V@@PEBD@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAAEAV01@_KV@@PEBD@Z + ??$_Reallocate_grow_by@V@@PEB_W_K@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAAEAV01@_KV@@PEB_W_K@Z + ??$_Reallocate_grow_by@V@@_W@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAAEAV01@_KV@@_W@Z + ??$_Reallocate_grow_by@V@@D@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAAEAV01@_KV@@D@Z + ??$_Reallocate_grow_by@V@@PEBD_K@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAAEAV01@_KV@@PEBD_K@Z + ??$_Reallocate_grow_by@V@@_K_W@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAAEAV01@_KV@@_K_W@Z + ??$_Reallocate_grow_by@V@@_KD@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAAEAV01@_KV@@_KD@Z + ??$_Reallocate_grow_by@V@@_KPEBD_K@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAAEAV01@_KV@@_KPEBD2@Z + ??$_Refancy@PEAD$0A@@std@@YAPEADPEAD@Z + ??$_Refancy@PEAI$0A@@std@@YAPEAIPEAI@Z + ??$_Refancy@PEAN$0A@@std@@YAPEANPEAN@Z + ??$_Refancy@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@$0A@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@PEAU120@@Z + ??$_Refancy@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@$0A@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@PEAU120@@Z + ??$_Refancy@PEBD$0A@@std@@YAPEBDPEBD@Z + ??$_Regex_replace1@V?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEBDV?$regex_traits@D@2@DU?$char_traits@D@2@V?$allocator@D@2@@std@@YA?AV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@V10@PEBD1AEBV?$basic_regex@DV?$regex_traits@D@std@@@0@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@W4match_flag_type@regex_constants@0@@Z + ??$_Regex_search2@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@DV?$regex_traits@D@2@PEBD@std@@YA_NPEBD0PEAV?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@0@AEBV?$basic_regex@DV?$regex_traits@D@std@@@0@W4match_flag_type@regex_constants@0@0@Z + ??$_Regex_search2@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@DV?$regex_traits@D@2@V12@@std@@YA_NV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@0PEAV?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@0@AEBV?$basic_regex@DV?$regex_traits@D@std@@@0@W4match_flag_type@regex_constants@0@0@Z + ??$_Reset@PEBD@?$basic_regex@DV?$regex_traits@D@std@@@std@@AEAAXPEBD0W4syntax_option_type@regex_constants@1@Uforward_iterator_tag@1@@Z + ??$_Resize@I@?$vector@IV?$allocator@I@std@@@std@@AEAAX_KAEBI@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@DV?$allocator@D@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@IV?$allocator@I@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@MV?$allocator@M@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@NV?$allocator@N@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@U_Value_init_tag@std@@@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize@_J@?$vector@_JV?$allocator@_J@std@@@std@@AEAAX_KAEB_J@Z + ??$_Resize_reallocate@I@?$vector@IV?$allocator@I@std@@@std@@AEAAX_KAEBI@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@DV?$allocator@D@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@IV?$allocator@I@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@MV?$allocator@M@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@NV?$allocator@N@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@U_Value_init_tag@std@@@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAX_KAEBU_Value_init_tag@1@@Z + ??$_Resize_reallocate@_J@?$vector@_JV?$allocator@_J@std@@@std@@AEAAX_KAEB_J@Z + ??$_Seek_wrapped@PEADAEAPEAD@std@@YAXAEAPEAD0@Z + ??$_Seek_wrapped@PEAHPEAH@std@@YAXAEAPEAH$$QEAPEAH@Z + ??$_Seek_wrapped@PEAUllama_token_data@@AEAPEAU1@@std@@YAXAEAPEAUllama_token_data@@0@Z + ??$_Seek_wrapped@PEAUllama_token_data@@PEAU1@@std@@YAXAEAPEAUllama_token_data@@$$QEAPEAU1@@Z + ??$_Seek_wrapped@PEBDPEBD@std@@YAXAEAPEBD$$QEAPEBD@Z + ??$_Seek_wrapped@PEBHPEBH@std@@YAXAEAPEBH$$QEAPEBH@Z + ??$_Seek_wrapped@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@PEBD@std@@YAXAEAV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@$$QEAPEBD@Z + ??$_Seek_wrapped@V?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@AEAPEAD@std@@YAXAEAV?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@AEAPEAD@Z + ??$_Seek_wrapped@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@V12@@std@@YAXAEAV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@$$QEAV10@@Z + ??$_Seek_wrapped@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@AEAPEBN@std@@YAXAEAV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@0@AEAPEBN@Z + ??$_Seek_wrapped@V?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V12@@std@@YAXAEAV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@$$QEAV10@@Z + ??$_Swap_adl@PEAU?$_List_node@HPEAX@std@@@std@@YAXAEAPEAU?$_List_node@HPEAX@0@0@Z + ??$_Swap_adl@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@0@Z + ??$_Swap_adl@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@YAXAEAPEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@0@Z + ??$_Swap_adl@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@YAXAEAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@0@Z + ??$_Swap_adl@PEAV_Node_base@std@@@std@@YAXAEAPEAV_Node_base@0@0@Z + ??$_To_address@PEAD@std@@YA?A_PAEBQEAD@Z + ??$_To_address@PEAE@std@@YA?A_PAEBQEAE@Z + ??$_To_address@PEAH@std@@YA?A_PAEBQEAH@Z + ??$_To_address@PEAI@std@@YA?A_PAEBQEAI@Z + ??$_To_address@PEAM@std@@YA?A_PAEBQEAM@Z + ??$_To_address@PEAN@std@@YA?A_PAEBQEAN@Z + ??$_To_address@PEAPEAUggml_tensor@@@std@@YA?A_PAEBQEAPEAUggml_tensor@@@Z + ??$_To_address@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YA?A_PAEBQEAU_Grp_t@?$_Tgt_state_t@PEBD@0@@Z + ??$_To_address@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YA?A_PAEBQEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@@Z + ??$_To_address@PEAU_Loop_vals_t@std@@@std@@YA?A_PAEBQEAU_Loop_vals_t@0@@Z + ??$_To_address@PEAUdollyv2_layer@@@std@@YA?A_PAEBQEAUdollyv2_layer@@@Z + ??$_To_address@PEAUfalcon_layer@@@std@@YA?A_PAEBQEAUfalcon_layer@@@Z + ??$_To_address@PEAUgpt2_layer@@@std@@YA?A_PAEBQEAUgpt2_layer@@@Z + ??$_To_address@PEAUgpt_neox_layer@@@std@@YA?A_PAEBQEAUgpt_neox_layer@@@Z + ??$_To_address@PEAUgptj_layer@@@std@@YA?A_PAEBQEAUgptj_layer@@@Z + ??$_To_address@PEAUllama_layer@@@std@@YA?A_PAEBQEAUllama_layer@@@Z + ??$_To_address@PEAUllama_sp_bigram@@@std@@YA?A_PAEBQEAUllama_sp_bigram@@@Z + ??$_To_address@PEAUllama_sp_symbol@@@std@@YA?A_PAEBQEAUllama_sp_symbol@@@Z + ??$_To_address@PEAUllama_token_data@@@std@@YA?A_PAEBQEAUllama_token_data@@@Z + ??$_To_address@PEAUmpt_layer@@@std@@YA?A_PAEBQEAUmpt_layer@@@Z + ??$_To_address@PEAUreplit_layer@@@std@@YA?A_PAEBQEAUreplit_layer@@@Z + ??$_To_address@PEAUstarcoder_layer@@@std@@YA?A_PAEBQEAUstarcoder_layer@@@Z + ??$_To_address@PEA_J@std@@YA?A_PAEBQEA_J@Z + ??$_To_address@PEA_K@std@@YA?A_PAEBQEA_K@Z + ??$_To_address@PEBD@std@@YA?A_PAEBQEBD@Z + ??$_To_address@PEBH@std@@YA?A_PAEBQEBH@Z + ??$_To_address@PEBI@std@@YA?A_PAEBQEBI@Z + ??$_Traits_compare@U?$char_traits@D@std@@@std@@YAHQEBD_K01@Z + ??$_Traits_equal@U?$char_traits@D@std@@@std@@YA_NQEBD_K01@Z + ??$_Traits_find@U?$char_traits@D@std@@@std@@YA_KQEBD_K101@Z + ??$_Traits_rfind@U?$char_traits@D@std@@@std@@YA_KQEBD_K101@Z + ??$_Try_emplace@AEBH$$V@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEAA?AU?$pair@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@_N@1@AEBH@Z + ??$_Try_emplace@AEBH$$V@?$map@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@@std@@AEAA?AU?$pair@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@_N@1@AEBH@Z + ??$_Try_emplace@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$V@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAA?AU?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Try_emplace@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$V@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAA?AU?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Try_emplace@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$V@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEAA?AU?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@_N@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Try_emplace@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$V@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEAA?AU?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@_N@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Try_emplace@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$V@?$map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@AEAA?AU?$pair@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@_N@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Try_emplace@H$$V@?$map@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@@std@@AEAA?AU?$pair@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@_N@1@$$QEAH@Z + ??$_Try_emplace@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$V@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAA?AU?$pair@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N@1@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Try_emplace@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@$$V@?$map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAA?AU?$pair@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@_N@1@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$_Tuple_get@$0A@$$QEAH@std@@YA$$QEA_P$$QEAV?$tuple@$$QEAH@0@@Z + ??$_Tuple_get@$0A@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YA$$QEA_P$$QEAV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@@Z + ??$_Tuple_get@$0A@AEBH@std@@YA$$QEA_P$$QEAV?$tuple@AEBH@0@@Z + ??$_Tuple_get@$0A@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YA$$QEA_P$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@@Z + ??$_UIntegral_to_buff@DI@std@@YAPEADPEADI@Z + ??$_UIntegral_to_string@DI@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@I@Z + ??$_Ucopy@PEAD@?$vector@DV?$allocator@D@std@@@std@@AEAAPEADPEAD00@Z + ??$_Ucopy@PEAH@?$vector@HV?$allocator@H@std@@@std@@AEAAPEAHPEAH00@Z + ??$_Ucopy@PEAI@?$vector@IV?$allocator@I@std@@@std@@AEAAPEAIPEAI00@Z + ??$_Ucopy@PEAM@?$vector@NV?$allocator@N@std@@@std@@AEAAPEANPEAM0PEAN@Z + ??$_Ucopy@PEAN@?$vector@NV?$allocator@N@std@@@std@@AEAAPEANPEAN00@Z + ??$_Ucopy@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAPEAU_Grp_t@?$_Tgt_state_t@PEBD@1@PEAU231@00@Z + ??$_Ucopy@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@PEAU231@00@Z + ??$_Ucopy@PEBD@?$vector@DV?$allocator@D@std@@@std@@AEAAPEADPEBD0PEAD@Z + ??$_Ucopy@PEBH@?$vector@HV?$allocator@H@std@@@std@@AEAAPEAHPEBH0PEAH@Z + ??$_Ucopy@PEBI@?$vector@IV?$allocator@I@std@@@std@@AEAAPEAIPEBI0PEAI@Z + ??$_Ucopy@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@?$vector@HV?$allocator@H@std@@@std@@AEAAPEAHV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@1@0PEAH@Z + ??$_Unfancy@$$CBD@std@@YAPEBDPEBD@Z + ??$_Unfancy@D@std@@YAPEADPEAD@Z + ??$_Unfancy@E@std@@YAPEAEPEAE@Z + ??$_Unfancy@H@std@@YAPEAHPEAH@Z + ??$_Unfancy@I@std@@YAPEAIPEAI@Z + ??$_Unfancy@M@std@@YAPEAMPEAM@Z + ??$_Unfancy@N@std@@YAPEANPEAN@Z + ??$_Unfancy@PEAUggml_tensor@@@std@@YAPEAPEAUggml_tensor@@PEAPEAU1@@Z + ??$_Unfancy@U?$pair@NH@std@@@std@@YAPEAU?$pair@NH@0@PEAU10@@Z + ??$_Unfancy@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@YAPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@PEAU10@@Z + ??$_Unfancy@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@PEAU120@@Z + ??$_Unfancy@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@PEAU120@@Z + ??$_Unfancy@U_Loop_vals_t@std@@@std@@YAPEAU_Loop_vals_t@0@PEAU10@@Z + ??$_Unfancy@Udollyv2_layer@@@std@@YAPEAUdollyv2_layer@@PEAU1@@Z + ??$_Unfancy@Ufalcon_layer@@@std@@YAPEAUfalcon_layer@@PEAU1@@Z + ??$_Unfancy@Ugpt2_layer@@@std@@YAPEAUgpt2_layer@@PEAU1@@Z + ??$_Unfancy@Ugpt_neox_layer@@@std@@YAPEAUgpt_neox_layer@@PEAU1@@Z + ??$_Unfancy@Ugptj_layer@@@std@@YAPEAUgptj_layer@@PEAU1@@Z + ??$_Unfancy@Ullama_buffer@@@std@@YAPEAUllama_buffer@@PEAU1@@Z + ??$_Unfancy@Ullama_layer@@@std@@YAPEAUllama_layer@@PEAU1@@Z + ??$_Unfancy@Ullama_load_tensor@@@std@@YAPEAUllama_load_tensor@@PEAU1@@Z + ??$_Unfancy@Ullama_load_tensor_shard@@@std@@YAPEAUllama_load_tensor_shard@@PEAU1@@Z + ??$_Unfancy@Ullama_sp_bigram@@@std@@YAPEAUllama_sp_bigram@@PEAU1@@Z + ??$_Unfancy@Ullama_sp_symbol@@@std@@YAPEAUllama_sp_symbol@@PEAU1@@Z + ??$_Unfancy@Ullama_token_data@@@std@@YAPEAUllama_token_data@@PEAU1@@Z + ??$_Unfancy@Umpt_layer@@@std@@YAPEAUmpt_layer@@PEAU1@@Z + ??$_Unfancy@Ureplit_layer@@@std@@YAPEAUreplit_layer@@PEAU1@@Z + ??$_Unfancy@Ustarcoder_layer@@@std@@YAPEAUstarcoder_layer@@PEAU1@@Z + ??$_Unfancy@Utoken_score@llama_vocab@@@std@@YAPEAUtoken_score@llama_vocab@@PEAU12@@Z + ??$_Unfancy@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@PEAV10@@Z + ??$_Unfancy@V?$sub_match@PEBD@std@@@std@@YAPEAV?$sub_match@PEBD@0@PEAV10@@Z + ??$_Unfancy@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YAPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@PEAV10@@Z + ??$_Unfancy@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@YAPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@0@PEAV10@@Z + ??$_Unfancy@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@YAPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@0@PEAV10@@Z + ??$_Unfancy@Vthread@std@@@std@@YAPEAVthread@0@PEAV10@@Z + ??$_Unfancy@_J@std@@YAPEA_JPEA_J@Z + ??$_Unfancy@_K@std@@YAPEA_KPEA_K@Z + ??$_Unfancy@_W@std@@YAPEA_WPEA_W@Z + ??$_Unfancy_maybe_null@D@std@@YAPEADPEAD@Z + ??$_Unfancy_maybe_null@E@std@@YAPEAEPEAE@Z + ??$_Unfancy_maybe_null@H@std@@YAPEAHPEAH@Z + ??$_Unfancy_maybe_null@I@std@@YAPEAIPEAI@Z + ??$_Unfancy_maybe_null@M@std@@YAPEAMPEAM@Z + ??$_Unfancy_maybe_null@Ullama_token_data@@@std@@YAPEAUllama_token_data@@PEAU1@@Z + ??$_Unfancy_maybe_null@_J@std@@YAPEA_JPEA_J@Z + ??$_Ungetc@D@std@@YA_NAEBDPEAU_iobuf@@@Z + ??$_Uninitialized_copy@PEADV?$allocator@D@std@@@std@@YAPEADQEAD0PEADAEAV?$allocator@D@0@@Z + ??$_Uninitialized_copy@PEAHV?$allocator@H@std@@@std@@YAPEAHQEAH0PEAHAEAV?$allocator@H@0@@Z + ??$_Uninitialized_copy@PEAIV?$allocator@I@std@@@std@@YAPEAIQEAI0PEAIAEAV?$allocator@I@0@@Z + ??$_Uninitialized_copy@PEAMV?$allocator@N@std@@@std@@YAPEANQEAM0PEANAEAV?$allocator@N@0@@Z + ??$_Uninitialized_copy@PEANV?$allocator@N@std@@@std@@YAPEANQEAN0PEANAEAV?$allocator@N@0@@Z + ??$_Uninitialized_copy@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@QEAU120@0PEAU120@AEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@0@@Z + ??$_Uninitialized_copy@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@QEAU120@0PEAU120@AEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@0@@Z + ??$_Uninitialized_copy@PEBDV?$allocator@D@std@@@std@@YAPEADQEBD0PEADAEAV?$allocator@D@0@@Z + ??$_Uninitialized_copy@PEBHV?$allocator@H@std@@@std@@YAPEAHQEBH0PEAHAEAV?$allocator@H@0@@Z + ??$_Uninitialized_copy@PEBIV?$allocator@I@std@@@std@@YAPEAIQEBI0PEAIAEAV?$allocator@I@0@@Z + ??$_Uninitialized_copy@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@V?$allocator@H@2@@std@@YAPEAHV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@0PEAHAEAV?$allocator@H@0@@Z + ??$_Uninitialized_fill_n@V?$allocator@H@std@@@std@@YAPEAHPEAH_KAEBHAEAV?$allocator@H@0@@Z + ??$_Uninitialized_fill_n@V?$allocator@I@std@@@std@@YAPEAIPEAI_KAEBIAEAV?$allocator@I@0@@Z + ??$_Uninitialized_fill_n@V?$allocator@M@std@@@std@@YAPEAMPEAM_KAEBMAEAV?$allocator@M@0@@Z + ??$_Uninitialized_fill_n@V?$allocator@N@std@@@std@@YAPEANPEAN_KAEBNAEAV?$allocator@N@0@@Z + ??$_Uninitialized_fill_n@V?$allocator@_J@std@@@std@@YAPEA_JPEA_J_KAEB_JAEAV?$allocator@_J@0@@Z + ??$_Uninitialized_move@PEADV?$allocator@D@std@@@std@@YAPEADQEAD0PEADAEAV?$allocator@D@0@@Z + ??$_Uninitialized_move@PEAHV?$allocator@H@std@@@std@@YAPEAHQEAH0PEAHAEAV?$allocator@H@0@@Z + ??$_Uninitialized_move@PEAIV?$allocator@I@std@@@std@@YAPEAIQEAI0PEAIAEAV?$allocator@I@0@@Z + ??$_Uninitialized_move@PEAMV?$allocator@M@std@@@std@@YAPEAMQEAM0PEAMAEAV?$allocator@M@0@@Z + ??$_Uninitialized_move@PEANV?$allocator@N@std@@@std@@YAPEANQEAN0PEANAEAV?$allocator@N@0@@Z + ??$_Uninitialized_move@PEAPEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@YAPEAPEAUggml_tensor@@QEAPEAU1@0PEAPEAU1@AEAV?$allocator@PEAUggml_tensor@@@0@@Z + ??$_Uninitialized_move@PEAU?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@YAPEAU?$pair@NH@0@QEAU10@0PEAU10@AEAV?$allocator@U?$pair@NH@std@@@0@@Z + ??$_Uninitialized_move@PEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@YAPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@QEAU10@0PEAU10@AEAV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@0@@Z + ??$_Uninitialized_move@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@QEAU120@0PEAU120@AEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@0@@Z + ??$_Uninitialized_move@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@QEAU120@0PEAU120@AEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@0@@Z + ??$_Uninitialized_move@PEAU_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@YAPEAU_Loop_vals_t@0@QEAU10@0PEAU10@AEAV?$allocator@U_Loop_vals_t@std@@@0@@Z + ??$_Uninitialized_move@PEAUdollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@YAPEAUdollyv2_layer@@QEAU1@0PEAU1@AEAV?$allocator@Udollyv2_layer@@@0@@Z + ??$_Uninitialized_move@PEAUfalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@YAPEAUfalcon_layer@@QEAU1@0PEAU1@AEAV?$allocator@Ufalcon_layer@@@0@@Z + ??$_Uninitialized_move@PEAUgpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@YAPEAUgpt2_layer@@QEAU1@0PEAU1@AEAV?$allocator@Ugpt2_layer@@@0@@Z + ??$_Uninitialized_move@PEAUgpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@YAPEAUgpt_neox_layer@@QEAU1@0PEAU1@AEAV?$allocator@Ugpt_neox_layer@@@0@@Z + ??$_Uninitialized_move@PEAUgptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@YAPEAUgptj_layer@@QEAU1@0PEAU1@AEAV?$allocator@Ugptj_layer@@@0@@Z + ??$_Uninitialized_move@PEAUllama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@YAPEAUllama_layer@@QEAU1@0PEAU1@AEAV?$allocator@Ullama_layer@@@0@@Z + ??$_Uninitialized_move@PEAUllama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@YAPEAUllama_load_tensor@@QEAU1@0PEAU1@AEAV?$allocator@Ullama_load_tensor@@@0@@Z + ??$_Uninitialized_move@PEAUllama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@YAPEAUllama_load_tensor_shard@@QEAU1@0PEAU1@AEAV?$allocator@Ullama_load_tensor_shard@@@0@@Z + ??$_Uninitialized_move@PEAUllama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@YAPEAUllama_sp_bigram@@QEAU1@0PEAU1@AEAV?$allocator@Ullama_sp_bigram@@@0@@Z + ??$_Uninitialized_move@PEAUllama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@YAPEAUllama_sp_symbol@@QEAU1@0PEAU1@AEAV?$allocator@Ullama_sp_symbol@@@0@@Z + ??$_Uninitialized_move@PEAUllama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@YAPEAUllama_token_data@@QEAU1@0PEAU1@AEAV?$allocator@Ullama_token_data@@@0@@Z + ??$_Uninitialized_move@PEAUmpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@YAPEAUmpt_layer@@QEAU1@0PEAU1@AEAV?$allocator@Umpt_layer@@@0@@Z + ??$_Uninitialized_move@PEAUreplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@YAPEAUreplit_layer@@QEAU1@0PEAU1@AEAV?$allocator@Ureplit_layer@@@0@@Z + ??$_Uninitialized_move@PEAUstarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@YAPEAUstarcoder_layer@@QEAU1@0PEAU1@AEAV?$allocator@Ustarcoder_layer@@@0@@Z + ??$_Uninitialized_move@PEAUtoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@YAPEAUtoken_score@llama_vocab@@QEAU12@0PEAU12@AEAV?$allocator@Utoken_score@llama_vocab@@@0@@Z + ??$_Uninitialized_move@PEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@YAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@QEAV10@0PEAV10@AEAV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@@Z + ??$_Uninitialized_move@PEAV?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@YAPEAV?$sub_match@PEBD@0@QEAV10@0PEAV10@AEAV?$allocator@V?$sub_match@PEBD@std@@@0@@Z + ??$_Uninitialized_move@PEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@YAPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@QEAV10@0PEAV10@AEAV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@0@@Z + ??$_Uninitialized_move@PEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@YAPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@0@QEAV10@0PEAV10@AEAV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@0@@Z + ??$_Uninitialized_move@PEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@YAPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@0@QEAV10@0PEAV10@AEAV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@0@@Z + ??$_Uninitialized_move@PEAVthread@std@@V?$allocator@Vthread@std@@@2@@std@@YAPEAVthread@0@QEAV10@0PEAV10@AEAV?$allocator@Vthread@std@@@0@@Z + ??$_Uninitialized_move@PEA_JV?$allocator@_J@std@@@std@@YAPEA_JQEA_J0PEA_JAEAV?$allocator@_J@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@D@std@@@std@@YAPEADPEAD_KAEAV?$allocator@D@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@E@std@@@std@@YAPEAEPEAE_KAEAV?$allocator@E@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@I@std@@@std@@YAPEAIPEAI_KAEAV?$allocator@I@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@M@std@@@std@@YAPEAMPEAM_KAEAV?$allocator@M@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@N@std@@@std@@YAPEANPEAN_KAEAV?$allocator@N@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@U?$pair@NH@std@@@std@@@std@@YAPEAU?$pair@NH@0@PEAU10@_KAEAV?$allocator@U?$pair@NH@std@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@PEAU120@_KAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@PEAU120@_KAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@YAPEAU_Loop_vals_t@0@PEAU10@_KAEAV?$allocator@U_Loop_vals_t@std@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Udollyv2_layer@@@std@@@std@@YAPEAUdollyv2_layer@@PEAU1@_KAEAV?$allocator@Udollyv2_layer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Ufalcon_layer@@@std@@@std@@YAPEAUfalcon_layer@@PEAU1@_KAEAV?$allocator@Ufalcon_layer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Ugpt2_layer@@@std@@@std@@YAPEAUgpt2_layer@@PEAU1@_KAEAV?$allocator@Ugpt2_layer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Ugpt_neox_layer@@@std@@@std@@YAPEAUgpt_neox_layer@@PEAU1@_KAEAV?$allocator@Ugpt_neox_layer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Ugptj_layer@@@std@@@std@@YAPEAUgptj_layer@@PEAU1@_KAEAV?$allocator@Ugptj_layer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Ullama_buffer@@@std@@@std@@YAPEAUllama_buffer@@PEAU1@_KAEAV?$allocator@Ullama_buffer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Ullama_layer@@@std@@@std@@YAPEAUllama_layer@@PEAU1@_KAEAV?$allocator@Ullama_layer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Umpt_layer@@@std@@@std@@YAPEAUmpt_layer@@PEAU1@_KAEAV?$allocator@Umpt_layer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Ureplit_layer@@@std@@@std@@YAPEAUreplit_layer@@PEAU1@_KAEAV?$allocator@Ureplit_layer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Ustarcoder_layer@@@std@@@std@@YAPEAUstarcoder_layer@@PEAU1@_KAEAV?$allocator@Ustarcoder_layer@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@YAPEAUtoken_score@llama_vocab@@PEAU12@_KAEAV?$allocator@Utoken_score@llama_vocab@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@YAPEAV?$sub_match@PEBD@0@PEAV10@_KAEAV?$allocator@V?$sub_match@PEBD@std@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@YAPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@PEAV10@_KAEAV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@Vthread@std@@@std@@@std@@YAPEAVthread@0@PEAV10@_KAEAV?$allocator@Vthread@std@@@0@@Z + ??$_Uninitialized_value_construct_n@V?$allocator@_K@std@@@std@@YAPEA_KPEA_K_KAEAV?$allocator@_K@0@@Z + ??$_Voidify_iter@PEAD@std@@YAPEAXPEAD@Z + ??$_Voidify_iter@PEAE@std@@YAPEAXPEAE@Z + ??$_Voidify_iter@PEAH@std@@YAPEAXPEAH@Z + ??$_Voidify_iter@PEAI@std@@YAPEAXPEAI@Z + ??$_Voidify_iter@PEAM@std@@YAPEAXPEAM@Z + ??$_Voidify_iter@PEAN@std@@YAPEAXPEAN@Z + ??$_Voidify_iter@PEAPEAD@std@@YAPEAXPEAPEAD@Z + ??$_Voidify_iter@PEAPEAU?$_List_node@HPEAX@std@@@std@@YAPEAXPEAPEAU?$_List_node@HPEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@YAPEAXPEAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@0@@Z + ??$_Voidify_iter@PEAPEAUggml_tensor@@@std@@YAPEAXPEAPEAUggml_tensor@@@Z + ??$_Voidify_iter@PEAPEA_W@std@@YAPEAXPEAPEA_W@Z + ??$_Voidify_iter@PEAU?$pair@$$CBHH@std@@@std@@YAPEAXPEAU?$pair@$$CBHH@0@@Z + ??$_Voidify_iter@PEAU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YAPEAXPEAU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@@Z + ??$_Voidify_iter@PEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@YAPEAXPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@0@@Z + ??$_Voidify_iter@PEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@YAPEAXPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@@Z + ??$_Voidify_iter@PEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@YAPEAXPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@0@@Z + ??$_Voidify_iter@PEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@YAPEAXPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@0@@Z + ??$_Voidify_iter@PEAU?$pair@$$CBW4e_model@@_K@std@@@std@@YAPEAXPEAU?$pair@$$CBW4e_model@@_K@0@@Z + ??$_Voidify_iter@PEAU?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@YAPEAXPEAU?$pair@$$CBW4falcon_e_model@@_K@0@@Z + ??$_Voidify_iter@PEAU?$pair@NH@std@@@std@@YAPEAXPEAU?$pair@NH@0@@Z + ??$_Voidify_iter@PEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@YAPEAXPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@@Z + ??$_Voidify_iter@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YAPEAXPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@@Z + ??$_Voidify_iter@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YAPEAXPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@@Z + ??$_Voidify_iter@PEAU_Loop_vals_t@std@@@std@@YAPEAXPEAU_Loop_vals_t@0@@Z + ??$_Voidify_iter@PEAUdollyv2_layer@@@std@@YAPEAXPEAUdollyv2_layer@@@Z + ??$_Voidify_iter@PEAUfalcon_layer@@@std@@YAPEAXPEAUfalcon_layer@@@Z + ??$_Voidify_iter@PEAUgpt2_layer@@@std@@YAPEAXPEAUgpt2_layer@@@Z + ??$_Voidify_iter@PEAUgpt_neox_layer@@@std@@YAPEAXPEAUgpt_neox_layer@@@Z + ??$_Voidify_iter@PEAUgptj_layer@@@std@@YAPEAXPEAUgptj_layer@@@Z + ??$_Voidify_iter@PEAUllama_buffer@@@std@@YAPEAXPEAUllama_buffer@@@Z + ??$_Voidify_iter@PEAUllama_layer@@@std@@YAPEAXPEAUllama_layer@@@Z + ??$_Voidify_iter@PEAUllama_load_tensor@@@std@@YAPEAXPEAUllama_load_tensor@@@Z + ??$_Voidify_iter@PEAUllama_load_tensor_shard@@@std@@YAPEAXPEAUllama_load_tensor_shard@@@Z + ??$_Voidify_iter@PEAUllama_sp_bigram@@@std@@YAPEAXPEAUllama_sp_bigram@@@Z + ??$_Voidify_iter@PEAUllama_sp_symbol@@@std@@YAPEAXPEAUllama_sp_symbol@@@Z + ??$_Voidify_iter@PEAUllama_token_data@@@std@@YAPEAXPEAUllama_token_data@@@Z + ??$_Voidify_iter@PEAUmpt_layer@@@std@@YAPEAXPEAUmpt_layer@@@Z + ??$_Voidify_iter@PEAUreplit_layer@@@std@@YAPEAXPEAUreplit_layer@@@Z + ??$_Voidify_iter@PEAUstarcoder_layer@@@std@@YAPEAXPEAUstarcoder_layer@@@Z + ??$_Voidify_iter@PEAUtoken_score@llama_vocab@@@std@@YAPEAXPEAUtoken_score@llama_vocab@@@Z + ??$_Voidify_iter@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@YAPEAXPEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@@Z + ??$_Voidify_iter@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@YAPEAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@0@@Z + ??$_Voidify_iter@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@YAPEAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@@Z + ??$_Voidify_iter@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@YAPEAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@@Z + ??$_Voidify_iter@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@YAPEAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@0@@Z + ??$_Voidify_iter@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@YAPEAXPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@@Z + ??$_Voidify_iter@PEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YAPEAXPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@@Z + ??$_Voidify_iter@PEAV?$sub_match@PEBD@std@@@std@@YAPEAXPEAV?$sub_match@PEBD@0@@Z + ??$_Voidify_iter@PEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YAPEAXPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@@Z + ??$_Voidify_iter@PEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@YAPEAXPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@0@@Z + ??$_Voidify_iter@PEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@YAPEAXPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@0@@Z + ??$_Voidify_iter@PEAVthread@std@@@std@@YAPEAXPEAVthread@0@@Z + ??$_Voidify_iter@PEA_J@std@@YAPEAXPEA_J@Z + ??$_Voidify_iter@PEA_K@std@@YAPEAXPEA_K@Z + ??$_Within_limits@E@std@@YA_NAEBEU?$integral_constant@_N$00@0@U?$integral_constant@_N$0A@@0@22@Z + ??$_Within_limits@PEBDE@std@@YA_NAEBQEBDAEBE@Z + ??$_Write@DU?$char_traits@D@std@@@?$mersenne_twister@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@std@@QEBAAEAV?$basic_ostream@DU?$char_traits@D@std@@@1@AEAV21@@Z + ??$_Zero_range@PEAD@std@@YAPEADQEAD0@Z + ??$_Zero_range@PEAE@std@@YAPEAEQEAE0@Z + ??$_Zero_range@PEAI@std@@YAPEAIQEAI0@Z + ??$_Zero_range@PEAM@std@@YAPEAMQEAM0@Z + ??$_Zero_range@PEAN@std@@YAPEANQEAN0@Z + ??$_Zero_range@PEA_K@std@@YAPEA_KQEA_K0@Z + ??$accumulate@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@M@std@@YAMV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@0@0M@Z + ??$accumulate@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@MU?$plus@X@2@@std@@YAMV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@0@0MU?$plus@X@0@@Z + ??$addressof@$$CBU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YAPEBU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEBU10@@Z + ??$addressof@$$CBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@YAPEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@0@AEBU10@@Z + ??$addressof@$$CBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@YAPEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@0@AEBU10@@Z + ??$addressof@$$CBV?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@YAPEBV?$_List_val@U?$_List_simple_types@H@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@YAPEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@YAPEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@YAPEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@YAPEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@YAPEBV?$_String_val@U?$_Simple_types@D@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_String_val@U?$_Simple_types@_W@std@@@std@@@std@@YAPEBV?$_String_val@U?$_Simple_types@_W@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@@std@@YAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@YAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@YAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@@std@@YAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@@std@@YAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@YAPEBV?$_Vector_val@U?$_Simple_types@H@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@YAPEBV?$_Vector_val@U?$_Simple_types@N@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YAPEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEBV10@@Z + ??$addressof@$$CBV?$codecvt@DDU_Mbstatet@@@std@@@std@@YAPEBV?$codecvt@DDU_Mbstatet@@@0@AEBV10@@Z + ??$addressof@$$CBV?$collate@D@std@@@std@@YAPEBV?$collate@D@0@AEBV10@@Z + ??$addressof@$$CBV?$ctype@D@std@@@std@@YAPEBV?$ctype@D@0@AEBV10@@Z + ??$addressof@$$CBV?$vector@IV?$allocator@I@std@@@std@@@std@@YAPEBV?$vector@IV?$allocator@I@std@@@0@AEBV10@@Z + ??$addressof@$$CBV?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@@std@@YAPEBV?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@0@AEBV10@@Z + ??$addressof@$$CBV?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@@std@@YAPEBV?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@0@AEBV10@@Z + ??$addressof@$$CBV?$vector@_NV?$allocator@_N@std@@@std@@@std@@YAPEBV?$vector@_NV?$allocator@_N@std@@@0@AEBV10@@Z + ??$addressof@D@std@@YAPEADAEAD@Z + ??$addressof@H@std@@YAPEAHAEAH@Z + ??$addressof@PEAD@std@@YAPEAPEADAEAPEAD@Z + ??$addressof@PEAU?$_List_node@HPEAX@std@@@std@@YAPEAPEAU?$_List_node@HPEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@YAPEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAPEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAPEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@YAPEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@YAPEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_Sequence@D@std@@@std@@YAPEAPEAU?$_Sequence@D@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@YAPEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAPEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAPEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@YAPEAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@YAPEAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@0@AEAPEAU10@@Z + ??$addressof@PEA_W@std@@YAPEAPEA_WAEAPEA_W@Z + ??$addressof@PEBD@std@@YAPEAPEBDAEAPEBD@Z + ??$addressof@U?$pair@$$CBHH@std@@@std@@YAPEAU?$pair@$$CBHH@0@AEAU10@@Z + ??$addressof@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YAPEAU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEAU10@@Z + ??$addressof@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@YAPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@0@AEAU10@@Z + ??$addressof@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@YAPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@AEAU10@@Z + ??$addressof@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@YAPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@0@AEAU10@@Z + ??$addressof@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@YAPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@0@AEAU10@@Z + ??$addressof@U?$pair@$$CBW4e_model@@_K@std@@@std@@YAPEAU?$pair@$$CBW4e_model@@_K@0@AEAU10@@Z + ??$addressof@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@YAPEAU?$pair@$$CBW4falcon_e_model@@_K@0@AEAU10@@Z + ??$addressof@V?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@@std@@YAPEAV?$_Builder@PEBDDV?$regex_traits@D@std@@@0@AEAV10@@Z + ??$addressof@V?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@@std@@YAPEAV?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@0@AEAV10@@Z + ??$addressof@V?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@@std@@YAPEAV?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@0@AEAV10@@Z + ??$addressof@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@YAPEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@AEAV10@@Z + ??$addressof@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@YAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@YAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@YAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@YAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@YAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@YAPEAV?$_List_val@U?$_List_simple_types@H@std@@@0@AEAV10@@Z + ??$addressof@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@YAPEAV?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@YAPEAV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@YAPEAV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@YAPEAV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@YAPEAV?$_String_val@U?$_Simple_types@D@std@@@0@AEAV10@@Z + ??$addressof@V?$_String_val@U?$_Simple_types@_W@std@@@std@@@std@@YAPEAV?$_String_val@U?$_Simple_types@_W@std@@@0@AEAV10@@Z + ??$addressof@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@@std@@YAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@YAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@YAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@@std@@YAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@@std@@YAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@YAPEAV?$_Vector_val@U?$_Simple_types@H@std@@@0@AEAV10@@Z + ??$addressof@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@std@@YAPEAV?$_Vector_val@U?$_Simple_types@I@std@@@0@AEAV10@@Z + ??$addressof@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@YAPEAV?$_Vector_val@U?$_Simple_types@M@std@@@0@AEAV10@@Z + ??$addressof@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@YAPEAV?$_Vector_val@U?$_Simple_types@N@std@@@0@AEAV10@@Z + ??$addressof@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@YAPEAV?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@std@@YAPEAV?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@std@@YAPEAV?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@0@AEAV10@@Z + ??$addressof@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@std@@YAPEAV?$_Vector_val@U?$_Simple_types@_K@std@@@0@AEAV10@@Z + ??$addressof@V?$basic_filebuf@DU?$char_traits@D@std@@@std@@@std@@YAPEAV?$basic_filebuf@DU?$char_traits@D@std@@@0@AEAV10@@Z + ??$addressof@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEAV10@@Z + ??$addressof@V?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YAPEAV?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEAV10@@Z + ??$addressof@V?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@@std@@YAPEAV?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@0@AEAV10@@Z + ??$addressof@V?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@@std@@YAPEAV?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@0@AEAV10@@Z + ??$addressof@V?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@std@@@std@@YAPEAV?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@0@AEAV10@@Z + ??$addressof@V?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@@std@@YAPEAV?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@0@AEAV10@@Z + ??$addressof@Vmutex@std@@@std@@YAPEAVmutex@0@AEAV10@@Z + ??$advance@PEAI_J@std@@YAXAEAPEAI_J@Z + ??$advance@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@_J@std@@YAXAEAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@_J@Z + ??$advance@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@_J@std@@YAXAEAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@_J@Z + ??$advance@PEBI_J@std@@YAXAEAPEBI_J@Z + ??$advance@PEBN_J@std@@YAXAEAPEBN_J@Z + ??$assign@PEAI$0A@@?$vector@IV?$allocator@I@std@@@std@@QEAAXPEAI0@Z + ??$assign@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@$0A@@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@QEAAXPEAU_Grp_t@?$_Tgt_state_t@PEBD@1@0@Z + ??$assign@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@$0A@@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@QEAAXPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@0@Z + ??$back_inserter@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YA?AV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@@Z + ??$begin@PEBD@std@@YAPEBQEBDV?$initializer_list@PEBD@0@@Z + ??$construct@D$$V@?$_Default_allocator_traits@V?$allocator@D@std@@@std@@SAXAEAV?$allocator@D@1@QEAD@Z + ??$construct@DAEAD@?$_Default_allocator_traits@V?$allocator@D@std@@@std@@SAXAEAV?$allocator@D@1@QEADAEAD@Z + ??$construct@DAEBD@?$_Default_allocator_traits@V?$allocator@D@std@@@std@@SAXAEAV?$allocator@D@1@QEADAEBD@Z + ??$construct@DD@?$_Default_allocator_traits@V?$allocator@D@std@@@std@@SAXAEAV?$allocator@D@1@QEAD$$QEAD@Z + ??$construct@E$$V@?$_Default_allocator_traits@V?$allocator@E@std@@@std@@SAXAEAV?$allocator@E@1@QEAE@Z + ??$construct@HAEAH@?$_Default_allocator_traits@V?$allocator@H@std@@@std@@SAXAEAV?$allocator@H@1@QEAHAEAH@Z + ??$construct@HAEBH@?$_Default_allocator_traits@V?$allocator@H@std@@@std@@SAXAEAV?$allocator@H@1@QEAHAEBH@Z + ??$construct@HAEBH@?$_Default_allocator_traits@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@HPEAX@std@@@1@QEAHAEBH@Z + ??$construct@HH@?$_Default_allocator_traits@V?$allocator@H@std@@@std@@SAXAEAV?$allocator@H@1@QEAH$$QEAH@Z + ??$construct@I$$V@?$_Default_allocator_traits@V?$allocator@I@std@@@std@@SAXAEAV?$allocator@I@1@QEAI@Z + ??$construct@IAEAI@?$_Default_allocator_traits@V?$allocator@I@std@@@std@@SAXAEAV?$allocator@I@1@QEAIAEAI@Z + ??$construct@IAEBI@?$_Default_allocator_traits@V?$allocator@I@std@@@std@@SAXAEAV?$allocator@I@1@QEAIAEBI@Z + ??$construct@II@?$_Default_allocator_traits@V?$allocator@I@std@@@std@@SAXAEAV?$allocator@I@1@QEAI$$QEAI@Z + ??$construct@M$$V@?$_Default_allocator_traits@V?$allocator@M@std@@@std@@SAXAEAV?$allocator@M@1@QEAM@Z + ??$construct@MAEBM@?$_Default_allocator_traits@V?$allocator@M@std@@@std@@SAXAEAV?$allocator@M@1@QEAMAEBM@Z + ??$construct@MM@?$_Default_allocator_traits@V?$allocator@M@std@@@std@@SAXAEAV?$allocator@M@1@QEAM$$QEAM@Z + ??$construct@N$$V@?$_Default_allocator_traits@V?$allocator@N@std@@@std@@SAXAEAV?$allocator@N@1@QEAN@Z + ??$construct@NAEAM@?$_Default_allocator_traits@V?$allocator@N@std@@@std@@SAXAEAV?$allocator@N@1@QEANAEAM@Z + ??$construct@NAEAN@?$_Default_allocator_traits@V?$allocator@N@std@@@std@@SAXAEAV?$allocator@N@1@QEANAEAN@Z + ??$construct@NAEBN@?$_Default_allocator_traits@V?$allocator@N@std@@@std@@SAXAEAV?$allocator@N@1@QEANAEBN@Z + ??$construct@NN@?$_Default_allocator_traits@V?$allocator@N@std@@@std@@SAXAEAV?$allocator@N@1@QEAN$$QEAN@Z + ??$construct@PEAUggml_tensor@@AEBQEAU1@@?$_Default_allocator_traits@V?$allocator@PEAUggml_tensor@@@std@@@std@@SAXAEAV?$allocator@PEAUggml_tensor@@@1@QEAPEAUggml_tensor@@AEBQEAU3@@Z + ??$construct@PEAUggml_tensor@@PEAU1@@?$_Default_allocator_traits@V?$allocator@PEAUggml_tensor@@@std@@@std@@SAXAEAV?$allocator@PEAUggml_tensor@@@1@QEAPEAUggml_tensor@@$$QEAPEAU3@@Z + ??$construct@U?$pair@$$CBHH@std@@AEBUpiecewise_construct_t@2@V?$tuple@AEBH@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@1@QEAU?$pair@$$CBHH@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBH@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@AEBUpiecewise_construct_t@2@V?$tuple@$$QEAH@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@QEAU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@$$QEAH@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@AEBUpiecewise_construct_t@2@V?$tuple@AEBH@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@QEAU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBH@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@AEBUpiecewise_construct_t@2@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@AEBUpiecewise_construct_t@2@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$construct@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@AEBUpiecewise_construct_t@2@V?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@AEBUpiecewise_construct_t@2@V?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@AEBUpiecewise_construct_t@2@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@AEBUpiecewise_construct_t@2@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@AEA_K@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEA_K@Z + ??$construct@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@AEBUpiecewise_construct_t@2@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$tuple@$$V@2@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@AEBUpiecewise_construct_t@1@$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@$$QEAV?$tuple@$$V@1@@Z + ??$construct@U?$pair@$$CBW4e_model@@_K@std@@AEBU12@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@QEAU?$pair@$$CBW4e_model@@_K@1@AEBU31@@Z + ??$construct@U?$pair@$$CBW4falcon_e_model@@_K@std@@AEBU12@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@QEAU?$pair@$$CBW4falcon_e_model@@_K@1@AEBU31@@Z + ??$construct@U?$pair@NH@std@@$$V@?$_Default_allocator_traits@V?$allocator@U?$pair@NH@std@@@std@@@std@@SAXAEAV?$allocator@U?$pair@NH@std@@@1@QEAU?$pair@NH@1@@Z + ??$construct@U?$pair@NH@std@@U12@@?$_Default_allocator_traits@V?$allocator@U?$pair@NH@std@@@std@@@std@@SAXAEAV?$allocator@U?$pair@NH@std@@@1@QEAU?$pair@NH@1@$$QEAU31@@Z + ??$construct@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@AEAPEAUggml_tensor@@@?$_Default_allocator_traits@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@SAXAEAV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@QEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEAPEAUggml_tensor@@@Z + ??$construct@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@U12@@?$_Default_allocator_traits@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@SAXAEAV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@QEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@$$QEAU31@@Z + ??$construct@U_Grp_t@?$_Tgt_state_t@PEBD@std@@$$V@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@SAXAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@1@QEAU_Grp_t@?$_Tgt_state_t@PEBD@1@@Z + ??$construct@U_Grp_t@?$_Tgt_state_t@PEBD@std@@AEAU123@@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@SAXAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@1@QEAU_Grp_t@?$_Tgt_state_t@PEBD@1@AEAU341@@Z + ??$construct@U_Grp_t@?$_Tgt_state_t@PEBD@std@@U123@@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@SAXAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@1@QEAU_Grp_t@?$_Tgt_state_t@PEBD@1@$$QEAU341@@Z + ??$construct@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@$$V@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@1@QEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@@Z + ??$construct@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@AEAU123@@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@1@QEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@AEAU341@@Z + ??$construct@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@U123@@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@1@QEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@$$QEAU341@@Z + ??$construct@U_Loop_vals_t@std@@$$V@?$_Default_allocator_traits@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@SAXAEAV?$allocator@U_Loop_vals_t@std@@@1@QEAU_Loop_vals_t@1@@Z + ??$construct@U_Loop_vals_t@std@@U12@@?$_Default_allocator_traits@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@SAXAEAV?$allocator@U_Loop_vals_t@std@@@1@QEAU_Loop_vals_t@1@$$QEAU31@@Z + ??$construct@Udollyv2_layer@@$$V@?$_Default_allocator_traits@V?$allocator@Udollyv2_layer@@@std@@@std@@SAXAEAV?$allocator@Udollyv2_layer@@@1@QEAUdollyv2_layer@@@Z + ??$construct@Udollyv2_layer@@U1@@?$_Default_allocator_traits@V?$allocator@Udollyv2_layer@@@std@@@std@@SAXAEAV?$allocator@Udollyv2_layer@@@1@QEAUdollyv2_layer@@$$QEAU3@@Z + ??$construct@Ufalcon_layer@@$$V@?$_Default_allocator_traits@V?$allocator@Ufalcon_layer@@@std@@@std@@SAXAEAV?$allocator@Ufalcon_layer@@@1@QEAUfalcon_layer@@@Z + ??$construct@Ufalcon_layer@@U1@@?$_Default_allocator_traits@V?$allocator@Ufalcon_layer@@@std@@@std@@SAXAEAV?$allocator@Ufalcon_layer@@@1@QEAUfalcon_layer@@$$QEAU3@@Z + ??$construct@Ugpt2_layer@@$$V@?$_Default_allocator_traits@V?$allocator@Ugpt2_layer@@@std@@@std@@SAXAEAV?$allocator@Ugpt2_layer@@@1@QEAUgpt2_layer@@@Z + ??$construct@Ugpt2_layer@@U1@@?$_Default_allocator_traits@V?$allocator@Ugpt2_layer@@@std@@@std@@SAXAEAV?$allocator@Ugpt2_layer@@@1@QEAUgpt2_layer@@$$QEAU3@@Z + ??$construct@Ugpt_neox_layer@@$$V@?$_Default_allocator_traits@V?$allocator@Ugpt_neox_layer@@@std@@@std@@SAXAEAV?$allocator@Ugpt_neox_layer@@@1@QEAUgpt_neox_layer@@@Z + ??$construct@Ugpt_neox_layer@@U1@@?$_Default_allocator_traits@V?$allocator@Ugpt_neox_layer@@@std@@@std@@SAXAEAV?$allocator@Ugpt_neox_layer@@@1@QEAUgpt_neox_layer@@$$QEAU3@@Z + ??$construct@Ugptj_layer@@$$V@?$_Default_allocator_traits@V?$allocator@Ugptj_layer@@@std@@@std@@SAXAEAV?$allocator@Ugptj_layer@@@1@QEAUgptj_layer@@@Z + ??$construct@Ugptj_layer@@U1@@?$_Default_allocator_traits@V?$allocator@Ugptj_layer@@@std@@@std@@SAXAEAV?$allocator@Ugptj_layer@@@1@QEAUgptj_layer@@$$QEAU3@@Z + ??$construct@Ullama_buffer@@$$V@?$_Default_allocator_traits@V?$allocator@Ullama_buffer@@@std@@@std@@SAXAEAV?$allocator@Ullama_buffer@@@1@QEAUllama_buffer@@@Z + ??$construct@Ullama_layer@@$$V@?$_Default_allocator_traits@V?$allocator@Ullama_layer@@@std@@@std@@SAXAEAV?$allocator@Ullama_layer@@@1@QEAUllama_layer@@@Z + ??$construct@Ullama_layer@@U1@@?$_Default_allocator_traits@V?$allocator@Ullama_layer@@@std@@@std@@SAXAEAV?$allocator@Ullama_layer@@@1@QEAUllama_layer@@$$QEAU3@@Z + ??$construct@Ullama_load_tensor@@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Default_allocator_traits@V?$allocator@Ullama_load_tensor@@@std@@@std@@SAXAEAV?$allocator@Ullama_load_tensor@@@1@QEAUllama_load_tensor@@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$construct@Ullama_load_tensor@@U1@@?$_Default_allocator_traits@V?$allocator@Ullama_load_tensor@@@std@@@std@@SAXAEAV?$allocator@Ullama_load_tensor@@@1@QEAUllama_load_tensor@@$$QEAU3@@Z + ??$construct@Ullama_load_tensor_shard@@AEBU1@@?$_Default_allocator_traits@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@SAXAEAV?$allocator@Ullama_load_tensor_shard@@@1@QEAUllama_load_tensor_shard@@AEBU3@@Z + ??$construct@Ullama_load_tensor_shard@@U1@@?$_Default_allocator_traits@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@SAXAEAV?$allocator@Ullama_load_tensor_shard@@@1@QEAUllama_load_tensor_shard@@$$QEAU3@@Z + ??$construct@Ullama_sp_bigram@@AEBU1@@?$_Default_allocator_traits@V?$allocator@Ullama_sp_bigram@@@std@@@std@@SAXAEAV?$allocator@Ullama_sp_bigram@@@1@QEAUllama_sp_bigram@@AEBU3@@Z + ??$construct@Ullama_sp_bigram@@U1@@?$_Default_allocator_traits@V?$allocator@Ullama_sp_bigram@@@std@@@std@@SAXAEAV?$allocator@Ullama_sp_bigram@@@1@QEAUllama_sp_bigram@@$$QEAU3@@Z + ??$construct@Ullama_sp_symbol@@AEAU1@@?$_Default_allocator_traits@V?$allocator@Ullama_sp_symbol@@@std@@@std@@SAXAEAV?$allocator@Ullama_sp_symbol@@@1@QEAUllama_sp_symbol@@AEAU3@@Z + ??$construct@Ullama_sp_symbol@@U1@@?$_Default_allocator_traits@V?$allocator@Ullama_sp_symbol@@@std@@@std@@SAXAEAV?$allocator@Ullama_sp_symbol@@@1@QEAUllama_sp_symbol@@$$QEAU3@@Z + ??$construct@Ullama_token_data@@AEBU1@@?$_Default_allocator_traits@V?$allocator@Ullama_token_data@@@std@@@std@@SAXAEAV?$allocator@Ullama_token_data@@@1@QEAUllama_token_data@@AEBU3@@Z + ??$construct@Ullama_token_data@@U1@@?$_Default_allocator_traits@V?$allocator@Ullama_token_data@@@std@@@std@@SAXAEAV?$allocator@Ullama_token_data@@@1@QEAUllama_token_data@@$$QEAU3@@Z + ??$construct@Umpt_layer@@$$V@?$_Default_allocator_traits@V?$allocator@Umpt_layer@@@std@@@std@@SAXAEAV?$allocator@Umpt_layer@@@1@QEAUmpt_layer@@@Z + ??$construct@Umpt_layer@@U1@@?$_Default_allocator_traits@V?$allocator@Umpt_layer@@@std@@@std@@SAXAEAV?$allocator@Umpt_layer@@@1@QEAUmpt_layer@@$$QEAU3@@Z + ??$construct@Ureplit_layer@@$$V@?$_Default_allocator_traits@V?$allocator@Ureplit_layer@@@std@@@std@@SAXAEAV?$allocator@Ureplit_layer@@@1@QEAUreplit_layer@@@Z + ??$construct@Ureplit_layer@@U1@@?$_Default_allocator_traits@V?$allocator@Ureplit_layer@@@std@@@std@@SAXAEAV?$allocator@Ureplit_layer@@@1@QEAUreplit_layer@@$$QEAU3@@Z + ??$construct@Ustarcoder_layer@@$$V@?$_Default_allocator_traits@V?$allocator@Ustarcoder_layer@@@std@@@std@@SAXAEAV?$allocator@Ustarcoder_layer@@@1@QEAUstarcoder_layer@@@Z + ??$construct@Ustarcoder_layer@@U1@@?$_Default_allocator_traits@V?$allocator@Ustarcoder_layer@@@std@@@std@@SAXAEAV?$allocator@Ustarcoder_layer@@@1@QEAUstarcoder_layer@@$$QEAU3@@Z + ??$construct@Utoken_score@llama_vocab@@$$V@?$_Default_allocator_traits@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@SAXAEAV?$allocator@Utoken_score@llama_vocab@@@1@QEAUtoken_score@llama_vocab@@@Z + ??$construct@Utoken_score@llama_vocab@@U12@@?$_Default_allocator_traits@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@SAXAEAV?$allocator@Utoken_score@llama_vocab@@@1@QEAUtoken_score@llama_vocab@@$$QEAU34@@Z + ??$construct@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBV12@@?$_Default_allocator_traits@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@SAXAEAV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEBV31@@Z + ??$construct@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V12@@?$_Default_allocator_traits@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@SAXAEAV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@$$QEAV31@@Z + ??$construct@V?$sub_match@PEBD@std@@$$V@?$_Default_allocator_traits@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@SAXAEAV?$allocator@V?$sub_match@PEBD@std@@@1@QEAV?$sub_match@PEBD@1@@Z + ??$construct@V?$sub_match@PEBD@std@@V12@@?$_Default_allocator_traits@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@SAXAEAV?$allocator@V?$sub_match@PEBD@std@@@1@QEAV?$sub_match@PEBD@1@$$QEAV31@@Z + ??$construct@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@$$V@?$_Default_allocator_traits@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@1@QEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@@Z + ??$construct@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V12@@?$_Default_allocator_traits@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@1@QEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@$$QEAV31@@Z + ??$construct@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@AEAPEAUfalcon_file_loader@@@?$_Default_allocator_traits@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@1@QEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@1@AEAPEAUfalcon_file_loader@@@Z + ??$construct@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V12@@?$_Default_allocator_traits@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@1@QEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@1@$$QEAV31@@Z + ??$construct@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@AEAPEAUllama_file_loader@@@?$_Default_allocator_traits@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@1@QEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@1@AEAPEAUllama_file_loader@@@Z + ??$construct@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V12@@?$_Default_allocator_traits@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@1@QEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@1@$$QEAV31@@Z + ??$construct@Vthread@std@@$$V@?$_Default_allocator_traits@V?$allocator@Vthread@std@@@std@@@std@@SAXAEAV?$allocator@Vthread@std@@@1@QEAVthread@1@@Z + ??$construct@Vthread@std@@V12@@?$_Default_allocator_traits@V?$allocator@Vthread@std@@@std@@@std@@SAXAEAV?$allocator@Vthread@std@@@1@QEAVthread@1@$$QEAV31@@Z + ??$construct@_JAEB_J@?$_Default_allocator_traits@V?$allocator@_J@std@@@std@@SAXAEAV?$allocator@_J@1@QEA_JAEB_J@Z + ??$construct@_J_J@?$_Default_allocator_traits@V?$allocator@_J@std@@@std@@SAXAEAV?$allocator@_J@1@QEA_J$$QEA_J@Z + ??$construct@_K$$V@?$_Default_allocator_traits@V?$allocator@_K@std@@@std@@SAXAEAV?$allocator@_K@1@QEA_K@Z + ??$copy@PEBDV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YA?AV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@PEBD0V10@@Z + ??$copy@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@V12@@std@@YA?AV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@V10@00@Z + ??$copy@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@PEAH@std@@YAPEAHV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@0@0PEAH@Z + ??$copy@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@std@@PEAUllama_token_data@@@std@@YAPEAUllama_token_data@@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@0@0PEAU1@@Z + ??$copy_backward@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@V12@@std@@YA?AV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@V10@00@Z + ??$count@X@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEBA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$destroy@H@?$_Default_allocator_traits@V?$allocator@H@std@@@std@@SAXAEAV?$allocator@H@1@QEAH@Z + ??$destroy@H@?$_Default_allocator_traits@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@HPEAX@std@@@1@QEAH@Z + ??$destroy@U?$pair@$$CBHH@std@@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@1@QEAU?$pair@$$CBHH@1@@Z + ??$destroy@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@QEAU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@@Z + ??$destroy@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@@Z + ??$destroy@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@@Z + ??$destroy@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$destroy@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$destroy@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@1@@Z + ??$destroy@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@QEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@@Z + ??$destroy@U?$pair@$$CBW4e_model@@_K@std@@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@QEAU?$pair@$$CBW4e_model@@_K@1@@Z + ??$destroy@U?$pair@$$CBW4falcon_e_model@@_K@std@@@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@QEAU?$pair@$$CBW4falcon_e_model@@_K@1@@Z + ??$destroy@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@?$_Default_allocator_traits@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@SAXAEAV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@QEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$destroy@Ullama_buffer@@@?$_Default_allocator_traits@V?$allocator@Ullama_buffer@@@std@@@std@@SAXAEAV?$allocator@Ullama_buffer@@@1@QEAUllama_buffer@@@Z + ??$destroy@Ullama_load_tensor@@@?$_Default_allocator_traits@V?$allocator@Ullama_load_tensor@@@std@@@std@@SAXAEAV?$allocator@Ullama_load_tensor@@@1@QEAUllama_load_tensor@@@Z + ??$destroy@Ullama_load_tensor_shard@@@?$_Default_allocator_traits@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@SAXAEAV?$allocator@Ullama_load_tensor_shard@@@1@QEAUllama_load_tensor_shard@@@Z + ??$destroy@Ullama_sp_bigram@@@?$_Default_allocator_traits@V?$allocator@Ullama_sp_bigram@@@std@@@std@@SAXAEAV?$allocator@Ullama_sp_bigram@@@1@QEAUllama_sp_bigram@@@Z + ??$destroy@Utoken_score@llama_vocab@@@?$_Default_allocator_traits@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@SAXAEAV?$allocator@Utoken_score@llama_vocab@@@1@QEAUtoken_score@llama_vocab@@@Z + ??$destroy@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$_Default_allocator_traits@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@SAXAEAV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$destroy@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@?$_Default_allocator_traits@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@1@QEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@1@@Z + ??$destroy@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@?$_Default_allocator_traits@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@SAXAEAV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@1@QEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@1@@Z + ??$destroy@Vthread@std@@@?$_Default_allocator_traits@V?$allocator@Vthread@std@@@std@@@std@@SAXAEAV?$allocator@Vthread@std@@@1@QEAVthread@1@@Z + ??$distance@PEAD@std@@YA_JPEAD0@Z + ??$distance@PEAI@std@@YA_JPEAI0@Z + ??$distance@PEAM@std@@YA_JPEAM0@Z + ??$distance@PEAN@std@@YA_JPEAN0@Z + ??$distance@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YA_JPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@0@Z + ??$distance@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YA_JPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@0@Z + ??$distance@PEAUllama_token_data@@@std@@YA_JPEAUllama_token_data@@0@Z + ??$distance@PEBD@std@@YA_JPEBD0@Z + ??$distance@PEBH@std@@YA_JPEBH0@Z + ??$distance@PEBI@std@@YA_JPEBI0@Z + ??$distance@PEBN@std@@YA_JPEBN0@Z + ??$distance@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@YA_JV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@0@Z + ??$distance@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@YA_JV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@0@Z + ??$emplace@AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA?AU?$pair@V?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@_N@1@AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$emplace@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEA_K@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA?AU?$pair@V?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@_N@1@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEA_K@Z + ??$emplace@AEBH@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA?AU?$pair@V?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@_N@1@AEBH@Z + ??$emplace@H@?$vector@HV?$allocator@H@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@1@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@1@$$QEAH@Z + ??$emplace_back@AEAPEAUfalcon_file_loader@@@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@QEAA?A_TAEAPEAUfalcon_file_loader@@@Z + ??$emplace_back@AEAPEAUllama_file_loader@@@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@QEAA?A_TAEAPEAUllama_file_loader@@@Z + ??$emplace_back@AEAUllama_sp_symbol@@@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEAA?A_TAEAUllama_sp_symbol@@@Z + ??$emplace_back@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAA?A_TAEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$emplace_back@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAPEAUggml_tensor@@@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA?A_TAEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEAPEAUggml_tensor@@@Z + ??$emplace_back@AEBH@?$vector@HV?$allocator@H@std@@@std@@QEAA?A_TAEBH@Z + ??$emplace_back@AEBM@?$vector@MV?$allocator@M@std@@@std@@QEAA?A_TAEBM@Z + ??$emplace_back@AEBN@?$vector@NV?$allocator@N@std@@@std@@QEAA?A_TAEBN@Z + ??$emplace_back@AEBQEAUggml_tensor@@@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAA?A_TAEBQEAUggml_tensor@@@Z + ??$emplace_back@AEBUllama_load_tensor_shard@@@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAA?A_TAEBUllama_load_tensor_shard@@@Z + ??$emplace_back@AEBUllama_sp_bigram@@@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAA?A_TAEBUllama_sp_bigram@@@Z + ??$emplace_back@AEBUllama_token_data@@@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAA?A_TAEBUllama_token_data@@@Z + ??$emplace_back@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAA?A_TAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$emplace_back@H@?$vector@HV?$allocator@H@std@@@std@@QEAA?A_T$$QEAH@Z + ??$emplace_back@N@?$vector@NV?$allocator@N@std@@@std@@QEAA?A_T$$QEAN@Z + ??$emplace_back@U?$pair@NH@std@@@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAA?A_T$$QEAU?$pair@NH@1@@Z + ??$emplace_back@Ullama_token_data@@@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAA?A_T$$QEAUllama_token_data@@@Z + ??$emplace_back@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAA?A_T$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$emplace_back@Vthread@std@@@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEAA?A_T$$QEAVthread@1@@Z + ??$end@D$0BF@@std@@YAPEADAEAY0BF@D@Z + ??$end@PEBD@std@@YAPEBQEBDV?$initializer_list@PEBD@0@@Z + ??$equal@PEBDPEBDU?$_Cmp_icase@V?$_Regex_traits@D@std@@@std@@@std@@YA_NQEBD000U?$_Cmp_icase@V?$_Regex_traits@D@std@@@0@@Z + ??$equal@PEBDPEBDU?$_Cmp_icase@V?$_Regex_traits@D@std@@@std@@@std@@YA_NQEBD00U?$_Cmp_icase@V?$_Regex_traits@D@std@@@0@@Z + ??$equal@PEBIPEBI@std@@YA_NQEBI00@Z + ??$equal@PEBIPEBIU?$equal_to@X@std@@@std@@YA_NQEBI00U?$equal_to@X@0@@Z + ??$exchange@PEAH$$T@std@@YAPEAHAEAPEAH$$QEA$$T@Z + ??$exchange@PEAI$$T@std@@YAPEAIAEAPEAI$$QEA$$T@Z + ??$exchange@PEAPEAUggml_tensor@@$$T@std@@YAPEAPEAUggml_tensor@@AEAPEAPEAU1@$$QEA$$T@Z + ??$exchange@PEAU?$_List_node@HPEAX@std@@$$T@std@@YAPEAU?$_List_node@HPEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@$$T@std@@YAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@$$T@std@@YAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@$$T@std@@YAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@$$T@std@@YAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@$$T@std@@YAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@$$T@std@@YAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@AEAPEAU12@@std@@YAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@AEAPEAU10@0@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@$$T@std@@YAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@AEAPEAU12@@std@@YAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@0@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@$$T@std@@YAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@AEAPEAU12@@std@@YAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEAPEAU10@0@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@$$T@std@@YAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@AEAPEAU12@@std@@YAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@0@AEAPEAU10@0@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@$$T@std@@YAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@0@AEAPEAU10@$$QEA$$T@Z + ??$exchange@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@AEAPEAU12@@std@@YAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@0@AEAPEAU10@0@Z + ??$exchange@PEAUfalcon_file_loader@@$$T@std@@YAPEAUfalcon_file_loader@@AEAPEAU1@$$QEA$$T@Z + ??$exchange@PEAUfalcon_model_loader@@AEAPEAU1@@std@@YAPEAUfalcon_model_loader@@AEAPEAU1@0@Z + ??$exchange@PEAUllama_file_loader@@$$T@std@@YAPEAUllama_file_loader@@AEAPEAU1@$$QEA$$T@Z + ??$exchange@PEAUllama_load_tensor_shard@@$$T@std@@YAPEAUllama_load_tensor_shard@@AEAPEAU1@$$QEA$$T@Z + ??$exchange@PEAUllama_mmap@@$$T@std@@YAPEAUllama_mmap@@AEAPEAU1@$$QEA$$T@Z + ??$exchange@PEAUllama_mmap@@AEAPEAU1@@std@@YAPEAUllama_mmap@@AEAPEAU1@0@Z + ??$exchange@PEAUllama_model_loader@@AEAPEAU1@@std@@YAPEAUllama_model_loader@@AEAPEAU1@0@Z + ??$exchange@PEAV_Facet_base@std@@$$T@std@@YAPEAV_Facet_base@0@AEAPEAV10@$$QEA$$T@Z + ??$exchange@PEAV_Node_assert@std@@$$T@std@@YAPEAV_Node_assert@0@AEAPEAV10@$$QEA$$T@Z + ??$exchange@U_Thrd_t@@U1@@std@@YA?AU_Thrd_t@@AEAU1@$$QEAU1@@Z + ??$exchange@_KAEB_K@std@@YA_KAEA_KAEB_K@Z + ??$exchange@_K_K@std@@YA_KAEA_K$$QEA_K@Z + ??$fill@PEANN@std@@YAXQEAN0AEBN@Z + ??$fill@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@V12@@std@@YAXQEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@0AEBV10@@Z + ??$fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@0@0AEBV10@@Z + ??$fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@0AEBV10@@Z + ??$fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@0AEBV10@@Z + ??$fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@0@0AEBV10@@Z + ??$fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@0AEBV10@@Z + ??$fill@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@_N@std@@YAXV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@0AEB_N@Z + ??$find@PEBDE@std@@YAPEBDPEBDQEBDAEBE@Z + ??$find@PEBDW4_Meta_type@std@@@std@@YAPEBDPEBDQEBDAEBW4_Meta_type@0@@Z + ??$find@PEBHH@std@@YAPEBHPEBHQEBHAEBH@Z + ??$find@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@W4_Meta_type@2@@std@@YA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@V10@V10@AEBW4_Meta_type@0@@Z + ??$find@X@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@1@AEBH@Z + ??$find@X@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEBA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$find@X@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$find@X@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$find@X@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEBA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??$forward@AEAD@std@@YAAEADAEAD@Z + ??$forward@AEAH@std@@YAAEAHAEAH@Z + ??$forward@AEAI@std@@YAAEAIAEAI@Z + ??$forward@AEAM@std@@YAAEAMAEAM@Z + ??$forward@AEAN@std@@YAAEANAEAN@Z + ??$forward@AEAPEAD@std@@YAAEAPEADAEAPEAD@Z + ??$forward@AEAPEAU?$_List_node@HPEAX@std@@@std@@YAAEAPEAU?$_List_node@HPEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@YAAEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@YAAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@YAAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@YAAEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAAEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAAEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@YAAEAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@YAAEAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@AEAPEAUfalcon_file_loader@@@std@@YAAEAPEAUfalcon_file_loader@@AEAPEAU1@@Z + ??$forward@AEAPEAUfalcon_model_loader@@@std@@YAAEAPEAUfalcon_model_loader@@AEAPEAU1@@Z + ??$forward@AEAPEAUggml_tensor@@@std@@YAAEAPEAUggml_tensor@@AEAPEAU1@@Z + ??$forward@AEAPEAUllama_file_loader@@@std@@YAAEAPEAUllama_file_loader@@AEAPEAU1@@Z + ??$forward@AEAPEAUllama_model_loader@@@std@@YAAEAPEAUllama_model_loader@@AEAPEAU1@@Z + ??$forward@AEAPEAUllama_token_data@@@std@@YAAEAPEAUllama_token_data@@AEAPEAU1@@Z + ??$forward@AEAPEAV_Facet_base@std@@@std@@YAAEAPEAV_Facet_base@0@AEAPEAV10@@Z + ??$forward@AEAPEAV_Node_assert@std@@@std@@YAAEAPEAV_Node_assert@0@AEAPEAV10@@Z + ??$forward@AEAPEA_K@std@@YAAEAPEA_KAEAPEA_K@Z + ??$forward@AEAPEA_W@std@@YAAEAPEA_WAEAPEA_W@Z + ??$forward@AEAPEBN@std@@YAAEAPEBNAEAPEBN@Z + ??$forward@AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@YAAEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@AEAU10@@Z + ??$forward@AEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YAAEAU_Grp_t@?$_Tgt_state_t@PEBD@0@AEAU120@@Z + ??$forward@AEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YAAEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@AEAU120@@Z + ??$forward@AEAUllama_sp_symbol@@@std@@YAAEAUllama_sp_symbol@@AEAU1@@Z + ??$forward@AEAV?$allocator@D@std@@@std@@YAAEAV?$allocator@D@0@AEAV10@@Z + ??$forward@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YAAEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEAV10@@Z + ??$forward@AEAV?$vector@HV?$allocator@H@std@@@std@@@std@@YAAEAV?$vector@HV?$allocator@H@std@@@0@AEAV10@@Z + ??$forward@AEA_K@std@@YAAEA_KAEA_K@Z + ??$forward@AEBD@std@@YAAEBDAEBD@Z + ??$forward@AEBH@std@@YAAEBHAEBH@Z + ??$forward@AEBI@std@@YAAEBIAEBI@Z + ??$forward@AEBM@std@@YAAEBMAEBM@Z + ??$forward@AEBN@std@@YAAEBNAEBN@Z + ??$forward@AEBQEAD@std@@YAAEBQEADAEBQEAD@Z + ??$forward@AEBQEAU?$_List_node@HPEAX@std@@@std@@YAAEBQEAU?$_List_node@HPEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@YAAEBQEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAAEBQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAAEBQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@YAAEBQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@YAAEBQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@YAAEBQEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YAAEBQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YAAEBQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@YAAEBQEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@YAAEBQEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@0@AEBQEAU10@@Z + ??$forward@AEBQEAUggml_tensor@@@std@@YAAEBQEAUggml_tensor@@AEBQEAU1@@Z + ??$forward@AEBQEA_W@std@@YAAEBQEA_WAEBQEA_W@Z + ??$forward@AEBU?$less@H@std@@@std@@YAAEBU?$less@H@0@AEBU10@@Z + ??$forward@AEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YAAEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEBU10@@Z + ??$forward@AEBU?$less@W4e_model@@@std@@@std@@YAAEBU?$less@W4e_model@@@0@AEBU10@@Z + ??$forward@AEBU?$less@W4falcon_e_model@@@std@@@std@@YAAEBU?$less@W4falcon_e_model@@@0@AEBU10@@Z + ??$forward@AEBU?$pair@$$CBW4e_model@@_K@std@@@std@@YAAEBU?$pair@$$CBW4e_model@@_K@0@AEBU10@@Z + ??$forward@AEBU?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@YAAEBU?$pair@$$CBW4falcon_e_model@@_K@0@AEBU10@@Z + ??$forward@AEBUllama_load_tensor_shard@@@std@@YAAEBUllama_load_tensor_shard@@AEBU1@@Z + ??$forward@AEBUllama_sp_bigram@@@std@@YAAEBUllama_sp_bigram@@AEBU1@@Z + ??$forward@AEBUllama_token_data@@@std@@YAAEBUllama_token_data@@AEBU1@@Z + ??$forward@AEBUpiecewise_construct_t@std@@@std@@YAAEBUpiecewise_construct_t@0@AEBU10@@Z + ??$forward@AEBV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@YAAEBV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@AEBV10@@Z + ??$forward@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@YAAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@0@AEBV10@@Z + ??$forward@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@YAAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@AEBV10@@Z + ??$forward@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@YAAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@AEBV10@@Z + ??$forward@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@YAAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@0@AEBV10@@Z + ??$forward@AEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@YAAEBV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@AEBV10@@Z + ??$forward@AEBV?$allocator@D@std@@@std@@YAAEBV?$allocator@D@0@AEBV10@@Z + ??$forward@AEBV?$allocator@E@std@@@std@@YAAEBV?$allocator@E@0@AEBV10@@Z + ??$forward@AEBV?$allocator@H@std@@@std@@YAAEBV?$allocator@H@0@AEBV10@@Z + ??$forward@AEBV?$allocator@I@std@@@std@@YAAEBV?$allocator@I@0@AEBV10@@Z + ??$forward@AEBV?$allocator@M@std@@@std@@YAAEBV?$allocator@M@0@AEBV10@@Z + ??$forward@AEBV?$allocator@N@std@@@std@@YAAEBV?$allocator@N@0@AEBV10@@Z + ??$forward@AEBV?$allocator@U?$pair@$$CBHH@std@@@std@@@std@@YAAEBV?$allocator@U?$pair@$$CBHH@std@@@0@AEBV10@@Z + ??$forward@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@YAAEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@0@AEBV10@@Z + ??$forward@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@YAAEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@0@AEBV10@@Z + ??$forward@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@YAAEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@0@AEBV10@@Z + ??$forward@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@YAAEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@0@AEBV10@@Z + ??$forward@AEBV?$allocator@Ullama_buffer@@@std@@@std@@YAAEBV?$allocator@Ullama_buffer@@@0@AEBV10@@Z + ??$forward@AEBV?$allocator@_J@std@@@std@@YAAEBV?$allocator@_J@0@AEBV10@@Z + ??$forward@AEBV?$allocator@_K@std@@@std@@YAAEBV?$allocator@_K@0@AEBV10@@Z + ??$forward@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEBV10@@Z + ??$forward@AEBW4ggml_type@@@std@@YAAEBW4ggml_type@@AEBW41@@Z + ??$forward@AEB_J@std@@YAAEB_JAEB_J@Z + ??$forward@D@std@@YA$$QEADAEAD@Z + ??$forward@H@std@@YA$$QEAHAEAH@Z + ??$forward@I@std@@YA$$QEAIAEAI@Z + ??$forward@M@std@@YA$$QEAMAEAM@Z + ??$forward@N@std@@YA$$QEANAEAN@Z + ??$forward@PEAE@std@@YA$$QEAPEAEAEAPEAE@Z + ??$forward@PEAH@std@@YA$$QEAPEAHAEAPEAH@Z + ??$forward@PEAI@std@@YA$$QEAPEAIAEAPEAI@Z + ??$forward@PEAM@std@@YA$$QEAPEAMAEAPEAM@Z + ??$forward@PEAPEAUggml_tensor@@@std@@YA$$QEAPEAPEAUggml_tensor@@AEAPEAPEAU1@@Z + ??$forward@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@YA$$QEAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YA$$QEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YA$$QEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@YA$$QEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@YA$$QEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@YA$$QEAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YA$$QEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@YA$$QEAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@0@AEAPEAU10@@Z + ??$forward@PEAUfalcon_file_loader@@@std@@YA$$QEAPEAUfalcon_file_loader@@AEAPEAU1@@Z + ??$forward@PEAUggml_tensor@@@std@@YA$$QEAPEAUggml_tensor@@AEAPEAU1@@Z + ??$forward@PEAUllama_file_loader@@@std@@YA$$QEAPEAUllama_file_loader@@AEAPEAU1@@Z + ??$forward@PEAUllama_load_tensor_shard@@@std@@YA$$QEAPEAUllama_load_tensor_shard@@AEAPEAU1@@Z + ??$forward@PEAUllama_token_data@@@std@@YA$$QEAPEAUllama_token_data@@AEAPEAU1@@Z + ??$forward@PEAV_Node_assert@std@@@std@@YA$$QEAPEAV_Node_assert@0@AEAPEAV10@@Z + ??$forward@PEBD@std@@YA$$QEAPEBDAEAPEBD@Z + ??$forward@PEBH@std@@YA$$QEAPEBHAEAPEBH@Z + ??$forward@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@YA$$QEAU?$default_delete@Ufalcon_file_loader@@@0@AEAU10@@Z + ??$forward@U?$default_delete@Ullama_file_loader@@@std@@@std@@YA$$QEAU?$default_delete@Ullama_file_loader@@@0@AEAU10@@Z + ??$forward@U?$default_delete@Ullama_mmap@@@std@@@std@@YA$$QEAU?$default_delete@Ullama_mmap@@@0@AEAU10@@Z + ??$forward@U?$default_delete@V_Node_assert@std@@@std@@@std@@YA$$QEAU?$default_delete@V_Node_assert@std@@@0@AEAU10@@Z + ??$forward@U?$pair@NH@std@@@std@@YA$$QEAU?$pair@NH@0@AEAU10@@Z + ??$forward@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@YA$$QEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@AEAU10@@Z + ??$forward@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YA$$QEAU_Grp_t@?$_Tgt_state_t@PEBD@0@AEAU120@@Z + ??$forward@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YA$$QEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@AEAU120@@Z + ??$forward@U_Loop_vals_t@std@@@std@@YA$$QEAU_Loop_vals_t@0@AEAU10@@Z + ??$forward@U_Zero_then_variadic_args_t@std@@@std@@YA$$QEAU_Zero_then_variadic_args_t@0@AEAU10@@Z + ??$forward@Udollyv2_layer@@@std@@YA$$QEAUdollyv2_layer@@AEAU1@@Z + ??$forward@Ufalcon_layer@@@std@@YA$$QEAUfalcon_layer@@AEAU1@@Z + ??$forward@Ugpt2_layer@@@std@@YA$$QEAUgpt2_layer@@AEAU1@@Z + ??$forward@Ugpt_neox_layer@@@std@@YA$$QEAUgpt_neox_layer@@AEAU1@@Z + ??$forward@Ugptj_layer@@@std@@YA$$QEAUgptj_layer@@AEAU1@@Z + ??$forward@Ullama_layer@@@std@@YA$$QEAUllama_layer@@AEAU1@@Z + ??$forward@Ullama_load_tensor@@@std@@YA$$QEAUllama_load_tensor@@AEAU1@@Z + ??$forward@Ullama_load_tensor_shard@@@std@@YA$$QEAUllama_load_tensor_shard@@AEAU1@@Z + ??$forward@Ullama_sp_bigram@@@std@@YA$$QEAUllama_sp_bigram@@AEAU1@@Z + ??$forward@Ullama_sp_symbol@@@std@@YA$$QEAUllama_sp_symbol@@AEAU1@@Z + ??$forward@Ullama_token_data@@@std@@YA$$QEAUllama_token_data@@AEAU1@@Z + ??$forward@Umpt_layer@@@std@@YA$$QEAUmpt_layer@@AEAU1@@Z + ??$forward@Ureplit_layer@@@std@@YA$$QEAUreplit_layer@@AEAU1@@Z + ??$forward@Ustarcoder_layer@@@std@@YA$$QEAUstarcoder_layer@@AEAU1@@Z + ??$forward@Utoken_score@llama_vocab@@@std@@YA$$QEAUtoken_score@llama_vocab@@AEAU12@@Z + ??$forward@V?$_List_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@@std@@YA$$QEAV?$_List_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@0@AEAV10@@Z + ??$forward@V?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@YA$$QEAV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@AEAV10@@Z + ??$forward@V?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@YA$$QEAV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@AEAV10@@Z + ??$forward@V?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@@std@@YA$$QEAV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@0@AEAV10@@Z + ??$forward@V?$allocator@D@std@@@std@@YA$$QEAV?$allocator@D@0@AEAV10@@Z + ??$forward@V?$allocator@H@std@@@std@@YA$$QEAV?$allocator@H@0@AEAV10@@Z + ??$forward@V?$allocator@I@std@@@std@@YA$$QEAV?$allocator@I@0@AEAV10@@Z + ??$forward@V?$allocator@PEAUggml_tensor@@@std@@@std@@YA$$QEAV?$allocator@PEAUggml_tensor@@@0@AEAV10@@Z + ??$forward@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@YA$$QEAV?$allocator@U?$_List_node@HPEAX@std@@@0@AEAV10@@Z + ??$forward@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@YA$$QEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@0@AEAV10@@Z + ??$forward@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@YA$$QEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@0@AEAV10@@Z + ??$forward@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@YA$$QEAV?$allocator@Ullama_load_tensor_shard@@@0@AEAV10@@Z + ??$forward@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@YA$$QEAV?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@0@AEAV10@@Z + ??$forward@V?$allocator@_W@std@@@std@@YA$$QEAV?$allocator@_W@0@AEAV10@@Z + ??$forward@V?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YA$$QEAV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEAV10@@Z + ??$forward@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YA$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEAV10@@Z + ??$forward@V?$sub_match@PEBD@std@@@std@@YA$$QEAV?$sub_match@PEBD@0@AEAV10@@Z + ??$forward@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YA$$QEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@AEAV10@@Z + ??$forward@V?$tuple@$$QEAH@std@@@std@@YA$$QEAV?$tuple@$$QEAH@0@AEAV10@@Z + ??$forward@V?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YA$$QEAV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEAV10@@Z + ??$forward@V?$tuple@$$V@std@@@std@@YA$$QEAV?$tuple@$$V@0@AEAV10@@Z + ??$forward@V?$tuple@AEBH@std@@@std@@YA$$QEAV?$tuple@AEBH@0@AEAV10@@Z + ??$forward@V?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YA$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEAV10@@Z + ??$forward@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@YA$$QEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@0@AEAV10@@Z + ??$forward@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@YA$$QEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@0@AEAV10@@Z + ??$forward@V?$vector@HV?$allocator@H@std@@@std@@@std@@YA$$QEAV?$vector@HV?$allocator@H@std@@@0@AEAV10@@Z + ??$forward@Vthread@std@@@std@@YA$$QEAVthread@0@AEAV10@@Z + ??$forward@W4_Node_type@std@@@std@@YA$$QEAW4_Node_type@0@AEAW410@@Z + ??$forward@W4e_model@@@std@@YA$$QEAW4e_model@@AEAW41@@Z + ??$forward@W4falcon_e_model@@@std@@YA$$QEAW4falcon_e_model@@AEAW41@@Z + ??$forward@_J@std@@YA$$QEA_JAEA_J@Z + ??$forward@_K@std@@YA$$QEA_KAEA_K@Z + ??$forward@_N@std@@YA$$QEA_NAEA_N@Z + ??$forward_as_tuple@$$V@std@@YA?AV?$tuple@$$V@0@XZ + ??$forward_as_tuple@AEBH@std@@YA?AV?$tuple@AEBH@0@AEBH@Z + ??$forward_as_tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YA?AV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@@Z + ??$forward_as_tuple@H@std@@YA?AV?$tuple@$$QEAH@0@$$QEAH@Z + ??$forward_as_tuple@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YA?AV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@@Z + ??$generate_canonical@N$0?0V?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@std@@@std@@YANAEAV?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@0@@Z + ??$insert@AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@$0A@@?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA?AU?$pair@V?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@_N@1@AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@@Z + ??$insert@PEBU?$pair@$$CBW4e_model@@_K@std@@@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@QEAAXPEBU?$pair@$$CBW4e_model@@_K@1@0@Z + ??$insert@PEBU?$pair@$$CBW4falcon_e_model@@_K@std@@@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@QEAAXPEBU?$pair@$$CBW4falcon_e_model@@_K@1@0@Z + ??$insert@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAAXV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@1@0@Z + ??$iota@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@std@@H@std@@YAXV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@0@0H@Z + ??$iter_swap@PEAUllama_token_data@@PEAU1@@std@@YAXPEAUllama_token_data@@0@Z + ??$iter_swap@PEA_KPEA_K@std@@YAXPEA_K0@Z + ??$lookup_classname@PEBD@?$_Regex_traits@D@std@@QEBAFPEBD0_N@Z + ??$lower_bound@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@N@std@@YA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@0@V10@0AEBN@Z + ??$lower_bound@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@NU?$less@X@2@@std@@YA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@0@V10@V10@AEBNU?$less@X@0@@Z + ??$make_pair@AEAV?$vector@HV?$allocator@H@std@@@std@@AEAM@std@@YA?AU?$pair@V?$vector@HV?$allocator@H@std@@@std@@M@0@AEAV?$vector@HV?$allocator@H@std@@@0@AEAM@Z + ??$make_pair@AEA_KM@std@@YA?AU?$pair@_KM@0@AEA_K$$QEAM@Z + ??$make_pair@NAEAH@std@@YA?AU?$pair@NH@0@$$QEANAEAH@Z + ??$make_pair@V?$vector@HV?$allocator@H@std@@@std@@M@std@@YA?AU?$pair@V?$vector@HV?$allocator@H@std@@@std@@M@0@$$QEAV?$vector@HV?$allocator@H@std@@@0@$$QEAM@Z + ??$make_unique@V_Node_assert@std@@W4_Node_type@2@$0A@@std@@YA?AV?$unique_ptr@V_Node_assert@std@@U?$default_delete@V_Node_assert@std@@@2@@0@$$QEAW4_Node_type@0@@Z + ??$max@H@std@@YAAEBHAEBH0@Z + ??$max@N@std@@YAAEBNAEBN0@Z + ??$max@PEAD@std@@YAAEBQEADAEBQEAD0@Z + ??$max@_K@std@@YAAEB_KAEB_K0@Z + ??$min@H@std@@YAAEBHAEBH0@Z + ??$min@_J@std@@YAAEB_JAEB_J0@Z + ??$min@_K@std@@YAAEB_KAEB_K0@Z + ??$move@AEAD@std@@YA$$QEADAEAD@Z + ??$move@AEAH@std@@YA$$QEAHAEAH@Z + ??$move@AEAI@std@@YA$$QEAIAEAI@Z + ??$move@AEAM@std@@YA$$QEAMAEAM@Z + ??$move@AEAN@std@@YA$$QEANAEAN@Z + ??$move@AEAPEAE@std@@YA$$QEAPEAEAEAPEAE@Z + ??$move@AEAPEAM@std@@YA$$QEAPEAMAEAPEAM@Z + ??$move@AEAPEAU?$_List_node@HPEAX@std@@@std@@YA$$QEAPEAU?$_List_node@HPEAX@0@AEAPEAU10@@Z + ??$move@AEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@YA$$QEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@AEAPEAU10@@Z + ??$move@AEAPEAUggml_tensor@@@std@@YA$$QEAPEAUggml_tensor@@AEAPEAU1@@Z + ??$move@AEAPEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@YA$$QEAPEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@AEAPEAV10@@Z + ??$move@AEAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@YA$$QEAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@AEAPEAV10@@Z + ??$move@AEAPEAV_Node_base@std@@@std@@YA$$QEAPEAV_Node_base@0@AEAPEAV10@@Z + ??$move@AEAU?$pair@NH@std@@@std@@YA$$QEAU?$pair@NH@0@AEAU10@@Z + ??$move@AEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@YA$$QEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@0@AEAU10@@Z + ??$move@AEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YA$$QEAU_Grp_t@?$_Tgt_state_t@PEBD@0@AEAU120@@Z + ??$move@AEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YA$$QEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@AEAU120@@Z + ??$move@AEAU_Loop_vals_t@std@@@std@@YA$$QEAU_Loop_vals_t@0@AEAU10@@Z + ??$move@AEAUdollyv2_layer@@@std@@YA$$QEAUdollyv2_layer@@AEAU1@@Z + ??$move@AEAUfalcon_layer@@@std@@YA$$QEAUfalcon_layer@@AEAU1@@Z + ??$move@AEAUgpt2_layer@@@std@@YA$$QEAUgpt2_layer@@AEAU1@@Z + ??$move@AEAUgpt_neox_layer@@@std@@YA$$QEAUgpt_neox_layer@@AEAU1@@Z + ??$move@AEAUgptj_layer@@@std@@YA$$QEAUgptj_layer@@AEAU1@@Z + ??$move@AEAUllama_layer@@@std@@YA$$QEAUllama_layer@@AEAU1@@Z + ??$move@AEAUllama_load_tensor@@@std@@YA$$QEAUllama_load_tensor@@AEAU1@@Z + ??$move@AEAUllama_load_tensor_shard@@@std@@YA$$QEAUllama_load_tensor_shard@@AEAU1@@Z + ??$move@AEAUllama_sp_bigram@@@std@@YA$$QEAUllama_sp_bigram@@AEAU1@@Z + ??$move@AEAUllama_sp_symbol@@@std@@YA$$QEAUllama_sp_symbol@@AEAU1@@Z + ??$move@AEAUllama_token_data@@@std@@YA$$QEAUllama_token_data@@AEAU1@@Z + ??$move@AEAUllama_vocab@@@std@@YA$$QEAUllama_vocab@@AEAU1@@Z + ??$move@AEAUmpt_layer@@@std@@YA$$QEAUmpt_layer@@AEAU1@@Z + ??$move@AEAUreplit_layer@@@std@@YA$$QEAUreplit_layer@@AEAU1@@Z + ??$move@AEAUstarcoder_layer@@@std@@YA$$QEAUstarcoder_layer@@AEAU1@@Z + ??$move@AEAUtoken_score@llama_vocab@@@std@@YA$$QEAUtoken_score@llama_vocab@@AEAU12@@Z + ??$move@AEAV?$allocator@D@std@@@std@@YA$$QEAV?$allocator@D@0@AEAV10@@Z + ??$move@AEAV?$allocator@H@std@@@std@@YA$$QEAV?$allocator@H@0@AEAV10@@Z + ??$move@AEAV?$allocator@I@std@@@std@@YA$$QEAV?$allocator@I@0@AEAV10@@Z + ??$move@AEAV?$allocator@PEAUggml_tensor@@@std@@@std@@YA$$QEAV?$allocator@PEAUggml_tensor@@@0@AEAV10@@Z + ??$move@AEAV?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@YA$$QEAV?$allocator@U?$_List_node@HPEAX@std@@@0@AEAV10@@Z + ??$move@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@YA$$QEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@0@AEAV10@@Z + ??$move@AEAV?$allocator@Ullama_load_tensor_shard@@@std@@@std@@YA$$QEAV?$allocator@Ullama_load_tensor_shard@@@0@AEAV10@@Z + ??$move@AEAV?$allocator@Utoken_score@llama_vocab@@@std@@@std@@YA$$QEAV?$allocator@Utoken_score@llama_vocab@@@0@AEAV10@@Z + ??$move@AEAV?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@YA$$QEAV?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@0@AEAV10@@Z + ??$move@AEAV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@YA$$QEAV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@0@AEAV10@@Z + ??$move@AEAV?$allocator@_W@std@@@std@@YA$$QEAV?$allocator@_W@0@AEAV10@@Z + ??$move@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@YA$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEAV10@@Z + ??$move@AEAV?$sub_match@PEBD@std@@@std@@YA$$QEAV?$sub_match@PEBD@0@AEAV10@@Z + ??$move@AEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YA$$QEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@AEAV10@@Z + ??$move@AEAV?$tuple@$$QEAH@std@@@std@@YA$$QEAV?$tuple@$$QEAH@0@AEAV10@@Z + ??$move@AEAV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YA$$QEAV?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEAV10@@Z + ??$move@AEAV?$tuple@AEBH@std@@@std@@YA$$QEAV?$tuple@AEBH@0@AEAV10@@Z + ??$move@AEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@YA$$QEAV?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@AEAV10@@Z + ??$move@AEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@YA$$QEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@0@AEAV10@@Z + ??$move@AEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@YA$$QEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@0@AEAV10@@Z + ??$move@AEAV?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@std@@@std@@YA$$QEAV?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@0@AEAV10@@Z + ??$move@AEAV?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@@std@@YA$$QEAV?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@0@AEAV10@@Z + ??$move@AEAV?$unordered_set@HU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@H@2@@std@@@std@@YA$$QEAV?$unordered_set@HU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@H@2@@0@AEAV10@@Z + ??$move@AEAVthread@std@@@std@@YA$$QEAVthread@0@AEAV10@@Z + ??$move@AEAW4ggml_type@@@std@@YA$$QEAW4ggml_type@@AEAW41@@Z + ??$move@AEA_J@std@@YA$$QEA_JAEA_J@Z + ??$move@AEA_K@std@@YA$$QEA_KAEA_K@Z + ??$next@PEAI@std@@YAPEAIPEAI_J@Z + ??$next@PEAU_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@PEBD@0@PEAU120@_J@Z + ??$next@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@YAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@0@PEAU120@_J@Z + ??$next@PEBI@std@@YAPEBIPEBI_J@Z + ??$next@PEBN@std@@YAPEBNPEBN_J@Z + ??$pop_heap@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@std@@Ucomparator@llama_sp_bigram@@@std@@YAXV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@0@0Ucomparator@llama_sp_bigram@@@Z + ??$push_heap@V?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@std@@Ucomparator@llama_sp_bigram@@@std@@YAXV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@0@0Ucomparator@llama_sp_bigram@@@Z + ??$regex_replace@V?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@2@V?$regex_traits@D@2@DU?$char_traits@D@2@V?$allocator@D@2@@std@@YA?AV?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@0@V10@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@0@1AEBV?$basic_regex@DV?$regex_traits@D@std@@@0@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@W4match_flag_type@regex_constants@0@@Z + ??$regex_replace@V?$regex_traits@D@std@@DU?$char_traits@D@2@V?$allocator@D@2@@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEBV10@AEBV?$basic_regex@DV?$regex_traits@D@std@@@0@PEBDW4match_flag_type@regex_constants@0@@Z + ??$regex_search@U?$char_traits@D@std@@V?$allocator@D@2@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@DV?$regex_traits@D@2@@std@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@0@AEAV?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@0@AEBV?$basic_regex@DV?$regex_traits@D@std@@@0@W4match_flag_type@regex_constants@0@@Z + ??$seed@V?$_Wrap_istream@DU?$char_traits@D@std@@I@std@@$0A@@?$mersenne_twister@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@std@@QEAAXAEAV?$_Wrap_istream@DU?$char_traits@D@std@@I@1@_N@Z + ??$swap@PEAU?$_List_node@HPEAX@std@@$0A@@std@@YAXAEAPEAU?$_List_node@HPEAX@0@0@Z + ??$swap@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@$0A@@std@@YAXAEAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@0@0@Z + ??$swap@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@$0A@@std@@YAXAEAPEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@0@Z + ??$swap@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@$0A@@std@@YAXAEAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@0@Z + ??$swap@PEAV_Node_base@std@@$0A@@std@@YAXAEAPEAV_Node_base@0@0@Z + ??$swap@Ullama_token_data@@$0A@@std@@YAXAEAUllama_token_data@@0@Z + ??$swap@_K$0A@@std@@YAXAEA_K0@Z + ??$transform_primary@PEAD@?$_Regex_traits@D@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@PEAD0@Z + ??$transform_primary@PEBD@?$_Regex_traits@D@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@PEBD0@Z + ??$transform_primary@V?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@?$_Regex_traits@D@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@V?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@1@0@Z + ??$uninitialized_fill@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@V12@@std@@YAXQEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@0@0AEBV10@@Z + ??$uninitialized_fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@0@0AEBV10@@Z + ??$uninitialized_fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@0@0AEBV10@@Z + ??$uninitialized_fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@0@0AEBV10@@Z + ??$uninitialized_fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@0@0AEBV10@@Z + ??$uninitialized_fill@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@V12@@std@@YAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@0@0AEBV10@@Z + ??$use_facet@V?$codecvt@DDU_Mbstatet@@@std@@@std@@YAAEBV?$codecvt@DDU_Mbstatet@@@0@AEBVlocale@0@@Z + ??$use_facet@V?$collate@D@std@@@std@@YAAEBV?$collate@D@0@AEBVlocale@0@@Z + ??$use_facet@V?$ctype@D@std@@@std@@YAAEBV?$ctype@D@0@AEBVlocale@0@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@HPEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@@Z + ??0?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@@Z + ??0?$_Bt_state_t@PEBD@std@@QEAA@AEBV01@@Z + ??0?$_Bt_state_t@PEBD@std@@QEAA@XZ + ??0?$_Bt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAA@AEBV01@@Z + ??0?$_Bt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Buf@D@std@@QEAA@XZ + ??0?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAA@AEBV?$regex_traits@D@1@W4syntax_option_type@regex_constants@1@@Z + ??0?$_Cmp_collate@V?$regex_traits@D@std@@@std@@QEAA@AEBV?$regex_traits@D@1@@Z + ??0?$_Cmp_icase@V?$_Regex_traits@D@std@@@std@@QEAA@AEBV?$_Regex_traits@D@1@@Z + ??0?$_Cmp_icase@V?$regex_traits@D@std@@@std@@QEAA@AEBV?$regex_traits@D@1@@Z + ??0?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEAA@AEBV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@1@AEBV?$allocator@U?$pair@$$CBHH@std@@@1@@Z + ??0?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAA@AEBV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@1@@Z + ??0?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAA@AEBV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@@Z + ??0?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEAA@AEBV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@1@@Z + ??0?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEAA@AEBV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@1@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@1@@Z + ??0?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEAA@$$QEAV01@@Z + ??0?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEAA@AEBV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@1@AEBV?$allocator@H@1@@Z + ??0?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@QEAA@PEAU?$_List_node@HPEAX@1@PEBV?$_List_val@U?$_List_simple_types@H@std@@@1@@Z + ??0?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@1@@Z + ??0?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@1@@Z + ??0?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@1@@Z + ??0?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@1@@Z + ??0?$_List_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@QEAA@PEAU?$_List_node@HPEAX@1@PEBV?$_List_val@U?$_List_simple_types@H@std@@@1@@Z + ??0?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@1@@Z + ??0?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@1@@Z + ??0?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@1@@Z + ??0?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_List_node@HPEAX@1@PEBV?$_List_val@U?$_List_simple_types@H@std@@@1@@Z + ??0?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@1@@Z + ??0?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@1@@Z + ??0?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@1@@Z + ??0?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@1@@Z + ??0?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@1@@Z + ??0?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@QEAA@PEAU?$_List_node@HPEAX@1@PEBV?$_List_val@U?$_List_simple_types@H@std@@@1@@Z + ??0?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@1@@Z + ??0?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@1@@Z + ??0?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@1@@Z + ??0?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@1@@Z + ??0?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEAA@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@1@PEBV?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@1@@Z + ??0?$_List_val@U?$_List_simple_types@H@std@@@std@@QEAA@XZ + ??0?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@QEAA@XZ + ??0?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@QEAA@XZ + ??0?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAA@XZ + ??0?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@QEAA@XZ + ??0?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@QEAA@XZ + ??0?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@QEAA@PEBD0AEBV?$regex_traits@D@1@PEAV_Root_node@1@IW4syntax_option_type@regex_constants@1@W4match_flag_type@51@@Z + ??0?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@QEAA@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@1@0AEBV?$regex_traits@D@1@PEAV_Root_node@1@IW4syntax_option_type@regex_constants@1@W4match_flag_type@61@@Z + ??0?$_Node_class@DV?$regex_traits@D@std@@@std@@QEAA@W4_Node_type@1@W4_Node_flags@1@@Z + ??0?$_Node_str@D@std@@QEAA@W4_Node_flags@1@@Z + ??0?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@QEAA@AEBV?$regex_traits@D@1@PEBD1W4syntax_option_type@regex_constants@1@@Z + ??0?$_Regex_traits@D@std@@QEAA@XZ + ??0?$_Sequence@D@std@@QEAA@I@Z + ??0?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEAA@PEBDPEBU_Container_base0@1@@Z + ??0?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEAA@XZ + ??0?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEAA@PEBDPEBU_Container_base0@1@@Z + ??0?$_String_val@U?$_Simple_types@D@std@@@std@@QEAA@XZ + ??0?$_String_val@U?$_Simple_types@_W@std@@@std@@QEAA@XZ + ??0?$_Tgt_state_t@PEBD@std@@QEAA@AEBV01@@Z + ??0?$_Tgt_state_t@PEBD@std@@QEAA@XZ + ??0?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAA@AEBV01@@Z + ??0?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@QEAA@AEBU?$less@H@1@@Z + ??0?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA@AEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@@Z + ??0?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA@AEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@@Z + ??0?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@QEAA@AEBU?$less@W4e_model@@@1@@Z + ??0?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@QEAA@AEBU?$less@W4falcon_e_model@@@1@@Z + ??0?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@1@@Z + ??0?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@1@@Z + ??0?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@1@@Z + ??0?$_Tree_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@1@@Z + ??0?$_Tree_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@1@@Z + ??0?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@1@@Z + ??0?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@1@@Z + ??0?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@1@@Z + ??0?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@1@@Z + ??0?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@AEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@1@@Z + ??0?$_Tree_unchecked_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@1@@Z + ??0?$_Tree_unchecked_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@1@@Z + ??0?$_Tree_unchecked_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@1@@Z + ??0?$_Tree_unchecked_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@1@@Z + ??0?$_Tree_unchecked_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAA@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@1@PEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@1@@Z + ??0?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@QEAA@XZ + ??0?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@QEAA@XZ + ??0?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@QEAA@XZ + ??0?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@QEAA@XZ + ??0?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAA@XZ + ??0?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@QEAA@AEBV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@1@@Z + ??0?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@QEAA@AEBV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@1@@Z + ??0?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@QEAA@AEBV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@1@@Z + ??0?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@QEAA@AEBV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@1@@Z + ??0?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@QEAA@AEBV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@1@@Z + ??0?$_Uninitialized_backout@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@QEAA@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@1@@Z + ??0?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@QEAA@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@1@@Z + ??0?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@QEAA@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@1@@Z + ??0?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@QEAA@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@1@@Z + ??0?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@QEAA@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@1@@Z + ??0?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@QEAA@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@D@std@@@std@@QEAA@PEADAEAV?$allocator@D@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@E@std@@@std@@QEAA@PEAEAEAV?$allocator@E@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@H@std@@@std@@QEAA@PEAHAEAV?$allocator@H@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@I@std@@@std@@QEAA@PEAIAEAV?$allocator@I@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@M@std@@@std@@QEAA@PEAMAEAV?$allocator@M@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@N@std@@@std@@QEAA@PEANAEAV?$allocator@N@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAA@PEAPEAUggml_tensor@@AEAV?$allocator@PEAUggml_tensor@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@U?$pair@NH@std@@@std@@@std@@QEAA@PEAU?$pair@NH@1@AEAV?$allocator@U?$pair@NH@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAA@PEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@AEAV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@QEAA@PEAU_Grp_t@?$_Tgt_state_t@PEBD@1@AEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAA@PEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@AEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@QEAA@PEAU_Loop_vals_t@1@AEAV?$allocator@U_Loop_vals_t@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Udollyv2_layer@@@std@@@std@@QEAA@PEAUdollyv2_layer@@AEAV?$allocator@Udollyv2_layer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ufalcon_layer@@@std@@@std@@QEAA@PEAUfalcon_layer@@AEAV?$allocator@Ufalcon_layer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ugpt2_layer@@@std@@@std@@QEAA@PEAUgpt2_layer@@AEAV?$allocator@Ugpt2_layer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEAA@PEAUgpt_neox_layer@@AEAV?$allocator@Ugpt_neox_layer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ugptj_layer@@@std@@@std@@QEAA@PEAUgptj_layer@@AEAV?$allocator@Ugptj_layer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ullama_buffer@@@std@@@std@@QEAA@PEAUllama_buffer@@AEAV?$allocator@Ullama_buffer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ullama_layer@@@std@@@std@@QEAA@PEAUllama_layer@@AEAV?$allocator@Ullama_layer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAA@PEAUllama_load_tensor@@AEAV?$allocator@Ullama_load_tensor@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAA@PEAUllama_load_tensor_shard@@AEAV?$allocator@Ullama_load_tensor_shard@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAA@PEAUllama_sp_bigram@@AEAV?$allocator@Ullama_sp_bigram@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEAA@PEAUllama_sp_symbol@@AEAV?$allocator@Ullama_sp_symbol@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ullama_token_data@@@std@@@std@@QEAA@PEAUllama_token_data@@AEAV?$allocator@Ullama_token_data@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Umpt_layer@@@std@@@std@@QEAA@PEAUmpt_layer@@AEAV?$allocator@Umpt_layer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ureplit_layer@@@std@@@std@@QEAA@PEAUreplit_layer@@AEAV?$allocator@Ureplit_layer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEAA@PEAUstarcoder_layer@@AEAV?$allocator@Ustarcoder_layer@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAA@PEAUtoken_score@llama_vocab@@AEAV?$allocator@Utoken_score@llama_vocab@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@QEAA@PEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEAV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAA@PEAV?$sub_match@PEBD@1@AEAV?$allocator@V?$sub_match@PEBD@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAA@PEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@AEAV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@QEAA@PEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@1@AEAV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@QEAA@PEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@1@AEAV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@Vthread@std@@@std@@@std@@QEAA@PEAVthread@1@AEAV?$allocator@Vthread@std@@@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@_J@std@@@std@@QEAA@PEA_JAEAV?$allocator@_J@1@@Z + ??0?$_Uninitialized_backout_al@V?$allocator@_K@std@@@std@@QEAA@PEA_KAEAV?$allocator@_K@1@@Z + ??0?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@QEAA@AEBV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@1@@Z + ??0?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAA@PEBIPEBU_Container_base0@1@@Z + ??0?$_Vb_iter_base@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAA@PEBI_KPEBU_Container_base0@1@@Z + ??0?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAA@PEBIPEBU_Container_base0@1@@Z + ??0?$_Vb_reference@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAA@AEBV?$_Vb_iter_base@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@1@@Z + ??0?$_Vb_val@V?$allocator@_N@std@@@std@@QEAA@AEBV01@@Z + ??0?$_Vb_val@V?$allocator@_N@std@@@std@@QEAA@XZ + ??0?$_Vb_val@V?$allocator@_N@std@@@std@@QEAA@_KAEB_NAEBV?$allocator@_N@1@@Z + ??0?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@QEAA@PEAHPEBU_Container_base0@1@@Z + ??0?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@std@@QEAA@PEAIPEBU_Container_base0@1@@Z + ??0?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@QEAA@PEAMPEBU_Container_base0@1@@Z + ??0?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@QEAA@PEANPEBU_Container_base0@1@@Z + ??0?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@QEAA@PEAU?$pair@NH@1@PEBU_Container_base0@1@@Z + ??0?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@std@@QEAA@PEAUllama_sp_bigram@@PEBU_Container_base0@1@@Z + ??0?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@std@@QEAA@PEAUllama_token_data@@PEBU_Container_base0@1@@Z + ??0?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@std@@QEAA@PEA_KPEBU_Container_base0@1@@Z + ??0?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@QEAA@PEAHPEBU_Container_base0@1@@Z + ??0?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@std@@QEAA@PEAIPEBU_Container_base0@1@@Z + ??0?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@QEAA@PEAMPEBU_Container_base0@1@@Z + ??0?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@QEAA@PEANPEBU_Container_base0@1@@Z + ??0?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@QEAA@PEAU?$pair@NH@1@PEBU_Container_base0@1@@Z + ??0?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@std@@QEAA@PEAUllama_sp_bigram@@PEBU_Container_base0@1@@Z + ??0?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@std@@QEAA@PEAUllama_token_data@@PEBU_Container_base0@1@@Z + ??0?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@std@@QEAA@PEA_KPEBU_Container_base0@1@@Z + ??0?$_Vector_val@U?$_Simple_types@D@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@E@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@H@std@@@std@@QEAA@PEAH00@Z + ??0?$_Vector_val@U?$_Simple_types@H@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@I@std@@@std@@QEAA@PEAI00@Z + ??0?$_Vector_val@U?$_Simple_types@I@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@M@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@N@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@PEAUggml_tensor@@@std@@@std@@QEAA@PEAPEAUggml_tensor@@00@Z + ??0?$_Vector_val@U?$_Simple_types@PEAUggml_tensor@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@U_Loop_vals_t@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Udollyv2_layer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ufalcon_layer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ugpt2_layer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ugpt_neox_layer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ugptj_layer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ullama_buffer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ullama_layer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ullama_load_tensor@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ullama_load_tensor_shard@@@std@@@std@@QEAA@PEAUllama_load_tensor_shard@@00@Z + ??0?$_Vector_val@U?$_Simple_types@Ullama_load_tensor_shard@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ullama_sp_symbol@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Umpt_layer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ureplit_layer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Ustarcoder_layer@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Utoken_score@llama_vocab@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$sub_match@PEBD@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@Vthread@std@@@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@_J@std@@@std@@QEAA@XZ + ??0?$_Vector_val@U?$_Simple_types@_K@std@@@std@@QEAA@XZ + ??0?$_Wrap_istream@DU?$char_traits@D@std@@I@std@@QEAA@AEAV?$basic_istream@DU?$char_traits@D@std@@@1@@Z + ??0?$allocator@D@std@@QEAA@XZ + ??0?$allocator@E@std@@QEAA@XZ + ??0?$allocator@H@std@@QEAA@XZ + ??0?$allocator@I@std@@QEAA@XZ + ??0?$allocator@M@std@@QEAA@XZ + ??0?$allocator@N@std@@QEAA@XZ + ??0?$allocator@PEAUggml_tensor@@@std@@QEAA@XZ + ??0?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@QEAA@XZ + ??0?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@QEAA@XZ + ??0?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@QEAA@XZ + ??0?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@QEAA@XZ + ??0?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@QEAA@XZ + ??0?$allocator@U?$pair@$$CBHH@std@@@std@@QEAA@XZ + ??0?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@QEAA@XZ + ??0?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@QEAA@XZ + ??0?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@QEAA@XZ + ??0?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@QEAA@XZ + ??0?$allocator@U?$pair@NH@std@@@std@@QEAA@XZ + ??0?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@QEAA@XZ + ??0?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@QEAA@XZ + ??0?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$allocator@U_Loop_vals_t@std@@@std@@QEAA@XZ + ??0?$allocator@Udollyv2_layer@@@std@@QEAA@XZ + ??0?$allocator@Ufalcon_layer@@@std@@QEAA@XZ + ??0?$allocator@Ugpt2_layer@@@std@@QEAA@XZ + ??0?$allocator@Ugpt_neox_layer@@@std@@QEAA@XZ + ??0?$allocator@Ugptj_layer@@@std@@QEAA@XZ + ??0?$allocator@Ullama_buffer@@@std@@QEAA@XZ + ??0?$allocator@Ullama_layer@@@std@@QEAA@XZ + ??0?$allocator@Ullama_load_tensor@@@std@@QEAA@XZ + ??0?$allocator@Ullama_load_tensor_shard@@@std@@QEAA@XZ + ??0?$allocator@Ullama_sp_bigram@@@std@@QEAA@XZ + ??0?$allocator@Ullama_sp_symbol@@@std@@QEAA@XZ + ??0?$allocator@Ullama_token_data@@@std@@QEAA@XZ + ??0?$allocator@Umpt_layer@@@std@@QEAA@XZ + ??0?$allocator@Ureplit_layer@@@std@@QEAA@XZ + ??0?$allocator@Ustarcoder_layer@@@std@@QEAA@XZ + ??0?$allocator@Utoken_score@llama_vocab@@@std@@QEAA@XZ + ??0?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@XZ + ??0?$allocator@V?$sub_match@PEBD@std@@@std@@QEAA@XZ + ??0?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@QEAA@XZ + ??0?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@QEAA@XZ + ??0?$allocator@Vthread@std@@@std@@QEAA@XZ + ??0?$allocator@_J@std@@QEAA@XZ + ??0?$allocator@_K@std@@QEAA@XZ + ??0?$allocator@_N@std@@QEAA@XZ + ??0?$allocator@_W@std@@QEAA@XZ + ??0?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@AEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??0?$basic_filebuf@DU?$char_traits@D@std@@@std@@QEAA@XZ + ??0?$basic_ifstream@DU?$char_traits@D@std@@@std@@QEAA@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@HH@Z + ??0?$basic_ifstream@DU?$char_traits@D@std@@@std@@QEAA@PEBDHH@Z + ??0?$basic_regex@DV?$regex_traits@D@std@@@std@@QEAA@PEBDW4syntax_option_type@regex_constants@1@@Z + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@$$QEAV01@@Z + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@AEBV01@@Z + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@AEBV01@_K1AEBV?$allocator@D@1@@Z + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@AEBV?$allocator@D@1@@Z + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@QEBD@Z + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@QEBD_K@Z + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@U_String_constructor_concat_tag@1@AEAV01@1@Z + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@U_String_constructor_concat_tag@1@AEBV01@QEBD_K23@Z + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@XZ + ??0?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@_KD@Z + ??0?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEAA@$$QEAV01@@Z + ??0?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEAA@AEBV01@@Z + ??0?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEAA@XZ + ??0?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@H@Z + ??0?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@XZ + ??0?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@QEAA@_K@Z + ??0?$collate@D@std@@QEAA@AEBV_Locinfo@1@_K@Z + ??0?$fpos@U_Mbstatet@@@std@@QEAA@U_Mbstatet@@_J@Z + ??0?$fpos@U_Mbstatet@@@std@@QEAA@_J@Z + ??0?$initializer_list@H@std@@QEAA@PEBH0@Z + ??0?$initializer_list@I@std@@QEAA@PEBI0@Z + ??0?$initializer_list@PEBD@std@@QEAA@PEBQEBD0@Z + ??0?$initializer_list@U?$pair@$$CBW4e_model@@_K@std@@@std@@QEAA@PEBU?$pair@$$CBW4e_model@@_K@1@0@Z + ??0?$initializer_list@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@QEAA@PEBU?$pair@$$CBW4falcon_e_model@@_K@1@0@Z + ??0?$list@HV?$allocator@H@std@@@std@@QEAA@AEBV?$allocator@H@1@@Z + ??0?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBHH@std@@@1@@Z + ??0?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@1@@Z + ??0?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@1@@Z + ??0?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@1@@Z + ??0?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEAA@AEBV?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@1@@Z + ??0?$map@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@@std@@QEAA@XZ + ??0?$map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAA@XZ + ??0?$map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA@XZ + ??0?$map@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@@std@@QEAA@V?$initializer_list@U?$pair@$$CBW4e_model@@_K@std@@@1@@Z + ??0?$map@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@@std@@QEAA@V?$initializer_list@U?$pair@$$CBW4falcon_e_model@@_K@std@@@1@@Z + ??0?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAA@XZ + ??0?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAA@XZ + ??0?$mersenne_twister@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@std@@QEAA@III@Z + ??0?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@std@@QEAA@I@Z + ??0?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@std@@QEAA@XZ + ??0?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@QEAA@$$QEAU01@@Z + ??0?$priority_queue@Ullama_sp_bigram@@V?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@Ucomparator@1@@std@@QEAA@XZ + ??0?$regex_traits@D@std@@QEAA@XZ + ??0?$sub_match@PEBD@std@@QEAA@XZ + ??0?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAA@XZ + ??0?$tuple@$$QEAH@std@@QEAA@$$QEAV01@@Z + ??0?$tuple@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@$$QEAV01@@Z + ??0?$tuple@$$V@std@@QEAA@AEBV01@@Z + ??0?$tuple@AEBH@std@@QEAA@$$QEAV01@@Z + ??0?$tuple@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@$$QEAV01@@Z + ??0?$unique_lock@Vmutex@std@@@std@@QEAA@AEAVmutex@1@@Z + ??0?$unordered_map@HHU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEAA@XZ + ??0?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAA@XZ + ??0?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA@XZ + ??0?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEAA@XZ + ??0?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEAA@XZ + ??0?$unordered_set@HU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@H@2@@std@@QEAA@$$QEAV01@@Z + ??0?$unordered_set@HU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@H@2@@std@@QEAA@XZ + ??0?$vector@DV?$allocator@D@std@@@std@@QEAA@_KAEBV?$allocator@D@1@@Z + ??0?$vector@EV?$allocator@E@std@@@std@@QEAA@_KAEBV?$allocator@E@1@@Z + ??0?$vector@HV?$allocator@H@std@@@std@@QEAA@$$QEAV01@@Z + ??0?$vector@HV?$allocator@H@std@@@std@@QEAA@AEBV01@@Z + ??0?$vector@HV?$allocator@H@std@@@std@@QEAA@V?$initializer_list@H@1@AEBV?$allocator@H@1@@Z + ??0?$vector@HV?$allocator@H@std@@@std@@QEAA@XZ + ??0?$vector@HV?$allocator@H@std@@@std@@QEAA@_KAEBHAEBV?$allocator@H@1@@Z + ??0?$vector@IV?$allocator@I@std@@@std@@QEAA@$$QEAV01@@Z + ??0?$vector@IV?$allocator@I@std@@@std@@QEAA@AEBV01@@Z + ??0?$vector@IV?$allocator@I@std@@@std@@QEAA@V?$initializer_list@I@1@AEBV?$allocator@I@1@@Z + ??0?$vector@IV?$allocator@I@std@@@std@@QEAA@XZ + ??0?$vector@IV?$allocator@I@std@@@std@@QEAA@_KAEBIAEBV?$allocator@I@1@@Z + ??0?$vector@MV?$allocator@M@std@@@std@@QEAA@XZ + ??0?$vector@MV?$allocator@M@std@@@std@@QEAA@_KAEBMAEBV?$allocator@M@1@@Z + ??0?$vector@MV?$allocator@M@std@@@std@@QEAA@_KAEBV?$allocator@M@1@@Z + ??0?$vector@NV?$allocator@N@std@@@std@@QEAA@XZ + ??0?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAA@$$QEAV01@@Z + ??0?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAA@XZ + ??0?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAA@XZ + ??0?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA@XZ + ??0?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@QEAA@AEBV01@@Z + ??0?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@QEAA@XZ + ??0?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@QEAA@AEBV01@@Z + ??0?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@QEAA@XZ + ??0?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@QEAA@XZ + ??0?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@QEAA@XZ + ??0?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@QEAA@XZ + ??0?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@QEAA@XZ + ??0?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEAA@XZ + ??0?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@QEAA@XZ + ??0?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@QEAA@_KAEBV?$allocator@Ullama_buffer@@@1@@Z + ??0?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@QEAA@XZ + ??0?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAA@XZ + ??0?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAA@$$QEAV01@@Z + ??0?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAA@XZ + ??0?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAA@XZ + ??0?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEAA@XZ + ??0?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAA@XZ + ??0?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@QEAA@XZ + ??0?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@QEAA@XZ + ??0?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEAA@XZ + ??0?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAA@XZ + ??0?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAA@XZ + ??0?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@QEAA@XZ + ??0?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAA@XZ + ??0?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@QEAA@XZ + ??0?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@QEAA@XZ + ??0?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEAA@XZ + ??0?$vector@_JV?$allocator@_J@std@@@std@@QEAA@XZ + ??0?$vector@_JV?$allocator@_J@std@@@std@@QEAA@_KAEB_JAEBV?$allocator@_J@1@@Z + ??0?$vector@_KV?$allocator@_K@std@@@std@@QEAA@_KAEBV?$allocator@_K@1@@Z + ??0?$vector@_NV?$allocator@_N@std@@@std@@QEAA@AEBV01@@Z + ??0?$vector@_NV?$allocator@_N@std@@@std@@QEAA@XZ + ??0?$vector@_NV?$allocator@_N@std@@@std@@QEAA@_KAEBV?$allocator@_N@1@@Z + ??0?$wstring_convert@V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@_WV?$allocator@_W@2@V?$allocator@D@2@@std@@QEAA@XZ + ??0LLM@@QEAA@XZ + ??0RingBuffer@@QEAA@XZ + ??0_Bitmap@std@@QEAA@XZ + ??0_Bxty@?$_String_val@U?$_Simple_types@D@std@@@std@@QEAA@XZ + ??0_Bxty@?$_String_val@U?$_Simple_types@_W@std@@@std@@QEAA@XZ + ??0_Clear_guard@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAA@QEAV12@@Z + ??0_Clear_guard@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA@QEAV12@@Z + ??0_Clear_guard@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA@QEAV12@@Z + ??0_Clear_guard@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEAA@QEAV12@@Z + ??0_Clear_guard@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA@QEAV12@@Z + ??0_Clear_guard@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA@QEAV12@@Z + ??0_Fake_proxy_ptr_impl@std@@QEAA@AEBU_Fake_allocator@1@AEBU_Container_base0@1@@Z + ??0_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAA@XZ + ??0_Mutex_base@std@@QEAA@H@Z + ??0_Node_assert@std@@QEAA@W4_Node_type@1@W4_Node_flags@1@@Z + ??0_Node_back@std@@QEAA@I@Z + ??0_Node_base@std@@QEAA@W4_Node_type@1@W4_Node_flags@1@@Z + ??0_Node_capture@std@@QEAA@I@Z + ??0_Node_end_group@std@@QEAA@W4_Node_type@1@W4_Node_flags@1@PEAV_Node_base@1@@Z + ??0_Node_end_rep@std@@QEAA@XZ + ??0_Node_endif@std@@QEAA@XZ + ??0_Node_if@std@@QEAA@PEAV_Node_base@1@@Z + ??0_Node_rep@std@@QEAA@_NHHPEAV_Node_end_rep@1@I@Z + ??0_Range_eraser@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAA@AEAV?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@2@QEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@2@@Z + ??0_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA@AEAV?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@2@QEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@@Z + ??0_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA@AEAV?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@2@QEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@@Z + ??0_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEAA@AEAV?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@2@QEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@2@@Z + ??0_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA@AEAV?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@2@QEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@@Z + ??0_Range_eraser@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA@AEAV?$list@HV?$allocator@H@std@@@2@QEAU?$_List_node@HPEAX@2@@Z + ??0_Root_node@std@@QEAA@XZ + ??0_Sentry_base@?$basic_ostream@DU?$char_traits@D@std@@@std@@QEAA@AEAV12@@Z + ??0_System_error@std@@IEAA@Verror_code@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??0_System_error@std@@QEAA@AEBV01@@Z + ??0bad_alloc@std@@AEAA@QEBD@Z + ??0bad_alloc@std@@QEAA@AEBV01@@Z + ??0bad_array_new_length@std@@QEAA@AEBV01@@Z + ??0bad_array_new_length@std@@QEAA@XZ + ??0bad_cast@std@@QEAA@AEBV01@@Z + ??0bad_cast@std@@QEAA@XZ + ??0dollyv2_hparams@@QEAA@XZ + ??0dollyv2_llm@@QEAA@XZ + ??0dollyv2_model@@QEAA@XZ + ??0error_code@std@@QEAA@HAEBVerror_category@1@@Z + ??0error_condition@std@@QEAA@HAEBVerror_category@1@@Z + ??0exception@std@@QEAA@AEBV01@@Z + ??0exception@std@@QEAA@QEBD@Z + ??0exception@std@@QEAA@QEBDH@Z + ??0falcon_context@@QEAA@XZ + ??0falcon_file_loader@@QEAA@PEBD_KAEAUllama_load_tensors_map@@@Z + ??0falcon_file_saver@@QEAA@PEBDPEAUfalcon_file_loader@@W4llama_ftype@@@Z + ??0falcon_hparams@@QEAA@XZ + ??0falcon_kv_cache@@QEAA@XZ + ??0falcon_llm@@QEAA@XZ + ??0falcon_model@@QEAA@XZ + ??0falcon_model_loader@@QEAA@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_N1@Z + ??0gpt2_hparams@@QEAA@XZ + ??0gpt2_llm@@QEAA@XZ + ??0gpt2_model@@QEAA@XZ + ??0gpt_neox_hparams@@QEAA@XZ + ??0gpt_neox_llm@@QEAA@XZ + ??0gpt_neox_model@@QEAA@XZ + ??0gpt_vocab@@QEAA@XZ + ??0gptj_hparams@@QEAA@XZ + ??0gptj_llm@@QEAA@XZ + ??0gptj_model@@QEAA@XZ + ??0llama_buffer@@QEAA@XZ + ??0llama_context@@QEAA@XZ + ??0llama_file@@QEAA@PEBD0@Z + ??0llama_file_loader@@QEAA@PEBD_KAEAUllama_load_tensors_map@@@Z + ??0llama_file_saver@@QEAA@PEBDPEAUllama_file_loader@@W4llama_ftype@@@Z + ??0llama_hparams@@QEAA@XZ + ??0llama_kv_cache@@QEAA@XZ + ??0llama_llm@@QEAA@XZ + ??0llama_load_tensor@@QEAA@$$QEAU0@@Z + ??0llama_load_tensor@@QEAA@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z + ??0llama_load_tensor_shard@@QEAA@$$QEAU0@@Z + ??0llama_load_tensor_shard@@QEAA@AEBU0@@Z + ??0llama_load_tensor_shard@@QEAA@XZ + ??0llama_load_tensors_map@@QEAA@XZ + ??0llama_mlock@@QEAA@XZ + ??0llama_mmap@@QEAA@PEAUllama_file@@_N@Z + ??0llama_model@@QEAA@XZ + ??0llama_model_loader@@QEAA@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_N1@Z + ??0llama_tokenizer@@QEAA@AEBUllama_vocab@@@Z + ??0llama_vocab@@QEAA@XZ + ??0locale@std@@QEAA@XZ + ??0mpt_hparams@@QEAA@XZ + ??0mpt_llm@@QEAA@XZ + ??0mpt_model@@QEAA@XZ + ??0mutex@std@@QEAA@XZ + ??0range_error@std@@QEAA@AEBV01@@Z + ??0range_error@std@@QEAA@PEBD@Z + ??0replit_hparams@@QEAA@XZ + ??0replit_llm@@QEAA@XZ + ??0replit_model@@QEAA@XZ + ??0replit_tokenizer@@QEAA@XZ + ??0runtime_error@std@@QEAA@AEBV01@@Z + ??0runtime_error@std@@QEAA@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??0runtime_error@std@@QEAA@PEBD@Z + ??0sentry@?$basic_ostream@DU?$char_traits@D@std@@@std@@QEAA@AEAV12@@Z + ??0starcoder_hparams@@QEAA@XZ + ??0starcoder_llm@@QEAA@XZ + ??0starcoder_model@@QEAA@XZ + ??0system_error@std@@QEAA@AEBV01@@Z + ??0system_error@std@@QEAA@Verror_code@1@@Z + ??0thread@std@@QEAA@$$QEAV01@@Z + ??0thread@std@@QEAA@XZ + ??0token_score@llama_vocab@@QEAA@$$QEAU01@@Z + ??0token_score@llama_vocab@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Alloc_temporary2@V?$allocator@H@std@@@std@@QEAA@XZ + ??1?$_Bt_state_t@PEBD@std@@QEAA@XZ + ??1?$_Bt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Buf@D@std@@QEAA@XZ + ??1?$_Compressed_pair@V?$allocator@D@std@@V?$_String_val@U?$_Simple_types@D@std@@@2@$00@std@@QEAA@XZ + ??1?$_Compressed_pair@V?$allocator@_W@std@@V?$_String_val@U?$_Simple_types@_W@std@@@2@$00@std@@QEAA@XZ + ??1?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA@XZ + ??1?$_Hash_vec@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@QEAA@XZ + ??1?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_List_node_emplace_op2@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_List_node_emplace_op2@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@QEAA@XZ + ??1?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@QEAA@XZ + ??1?$_Node_class@DV?$regex_traits@D@std@@@std@@UEAA@XZ + ??1?$_Node_str@D@std@@UEAA@XZ + ??1?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@QEAA@XZ + ??1?$_Regex_traits@D@std@@QEAA@XZ + ??1?$_Sequence@D@std@@QEAA@XZ + ??1?$_String_val@U?$_Simple_types@D@std@@@std@@QEAA@XZ + ??1?$_String_val@U?$_Simple_types@_W@std@@@std@@QEAA@XZ + ??1?$_Tgt_state_t@PEBD@std@@QEAA@XZ + ??1?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@DV?$allocator@D@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@EV?$allocator@E@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@HV?$allocator@H@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@IV?$allocator@I@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@MV?$allocator@M@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@NV?$allocator@N@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@_JV?$allocator@_J@std@@@std@@@std@@QEAA@XZ + ??1?$_Tidy_guard@V?$vector@_KV?$allocator@_K@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@QEAA@XZ + ??1?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Tree_temp_node_alloc@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@D@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@E@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@H@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@I@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@M@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@N@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@U?$pair@NH@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Udollyv2_layer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ufalcon_layer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ugpt2_layer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ugptj_layer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ullama_buffer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ullama_layer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ullama_token_data@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Umpt_layer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ureplit_layer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@Vthread@std@@@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@_J@std@@@std@@QEAA@XZ + ??1?$_Uninitialized_backout_al@V?$allocator@_K@std@@@std@@QEAA@XZ + ??1?$_Vb_val@V?$allocator@_N@std@@@std@@QEAA@XZ + ??1?$basic_filebuf@DU?$char_traits@D@std@@@std@@UEAA@XZ + ??1?$basic_ifstream@DU?$char_traits@D@std@@@std@@UEAA@XZ + ??1?$basic_regex@DV?$regex_traits@D@std@@@std@@QEAA@XZ + ??1?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA@XZ + ??1?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEAA@XZ + ??1?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@UEAA@XZ + ??1?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@UEAA@XZ + ??1?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@UEAA@XZ + ??1?$collate@D@std@@MEAA@XZ + ??1?$discrete_distribution@H@std@@QEAA@XZ + ??1?$list@HV?$allocator@H@std@@@std@@QEAA@XZ + ??1?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEAA@XZ + ??1?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAA@XZ + ??1?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA@XZ + ??1?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEAA@XZ + ??1?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEAA@XZ + ??1?$map@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@@std@@QEAA@XZ + ??1?$map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAA@XZ + ??1?$map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA@XZ + ??1?$map@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@@std@@QEAA@XZ + ??1?$map@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@@std@@QEAA@XZ + ??1?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAA@XZ + ??1?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAA@XZ + ??1?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA@XZ + ??1?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@QEAA@XZ + ??1?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@QEAA@XZ + ??1?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@QEAA@XZ + ??1?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@QEAA@XZ + ??1?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@QEAA@XZ + ??1?$pair@V?$vector@HV?$allocator@H@std@@@std@@M@std@@QEAA@XZ + ??1?$priority_queue@Ullama_sp_bigram@@V?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@Ucomparator@1@@std@@QEAA@XZ + ??1?$regex_traits@D@std@@QEAA@XZ + ??1?$unique_lock@Vmutex@std@@@std@@QEAA@XZ + ??1?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@QEAA@XZ + ??1?$unique_ptr@Ufalcon_model_loader@@U?$default_delete@Ufalcon_model_loader@@@std@@@std@@QEAA@XZ + ??1?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@QEAA@XZ + ??1?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@std@@QEAA@XZ + ??1?$unique_ptr@Ullama_model_loader@@U?$default_delete@Ullama_model_loader@@@std@@@std@@QEAA@XZ + ??1?$unique_ptr@V_Facet_base@std@@U?$default_delete@V_Facet_base@std@@@2@@std@@QEAA@XZ + ??1?$unique_ptr@V_Node_assert@std@@U?$default_delete@V_Node_assert@std@@@2@@std@@QEAA@XZ + ??1?$unordered_map@HHU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEAA@XZ + ??1?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAA@XZ + ??1?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA@XZ + ??1?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEAA@XZ + ??1?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEAA@XZ + ??1?$unordered_set@HU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@H@2@@std@@QEAA@XZ + ??1?$vector@DV?$allocator@D@std@@@std@@QEAA@XZ + ??1?$vector@EV?$allocator@E@std@@@std@@QEAA@XZ + ??1?$vector@HV?$allocator@H@std@@@std@@QEAA@XZ + ??1?$vector@IV?$allocator@I@std@@@std@@QEAA@XZ + ??1?$vector@MV?$allocator@M@std@@@std@@QEAA@XZ + ??1?$vector@NV?$allocator@N@std@@@std@@QEAA@XZ + ??1?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAA@XZ + ??1?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAA@XZ + ??1?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA@XZ + ??1?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@QEAA@XZ + ??1?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@QEAA@XZ + ??1?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@QEAA@XZ + ??1?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@QEAA@XZ + ??1?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@QEAA@XZ + ??1?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@QEAA@XZ + ??1?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEAA@XZ + ??1?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@QEAA@XZ + ??1?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@QEAA@XZ + ??1?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@QEAA@XZ + ??1?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAA@XZ + ??1?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAA@XZ + ??1?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAA@XZ + ??1?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEAA@XZ + ??1?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAA@XZ + ??1?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@QEAA@XZ + ??1?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@QEAA@XZ + ??1?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEAA@XZ + ??1?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAA@XZ + ??1?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAA@XZ + ??1?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@QEAA@XZ + ??1?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAA@XZ + ??1?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@QEAA@XZ + ??1?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@QEAA@XZ + ??1?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEAA@XZ + ??1?$vector@_JV?$allocator@_J@std@@@std@@QEAA@XZ + ??1?$vector@_KV?$allocator@_K@std@@@std@@QEAA@XZ + ??1?$vector@_NV?$allocator@_N@std@@@std@@QEAA@XZ + ??1?$wstring_convert@V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@_WV?$allocator@_W@2@V?$allocator@D@2@@std@@UEAA@XZ + ??1LLM@@UEAA@XZ + ??1RingBuffer@@QEAA@XZ + ??1_Bxty@?$_String_val@U?$_Simple_types@D@std@@@std@@QEAA@XZ + ??1_Bxty@?$_String_val@U?$_Simple_types@_W@std@@@std@@QEAA@XZ + ??1_Clear_guard@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Clear_guard@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Clear_guard@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Clear_guard@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Clear_guard@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Clear_guard@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA@XZ + ??1_Generic_error_category@std@@UEAA@XZ + ??1_Mutex_base@std@@QEAA@XZ + ??1_Node_assert@std@@UEAA@XZ + ??1_Node_back@std@@UEAA@XZ + ??1_Node_base@std@@UEAA@XZ + ??1_Node_capture@std@@UEAA@XZ + ??1_Node_end_group@std@@UEAA@XZ + ??1_Node_end_rep@std@@UEAA@XZ + ??1_Node_endif@std@@UEAA@XZ + ??1_Node_if@std@@UEAA@XZ + ??1_Node_rep@std@@UEAA@XZ + ??1_Range_eraser@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA@XZ + ??1_Range_eraser@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA@XZ + ??1_Root_node@std@@UEAA@XZ + ??1_Sentry_base@?$basic_ostream@DU?$char_traits@D@std@@@std@@QEAA@XZ + ??1_System_error@std@@UEAA@XZ + ??1bad_alloc@std@@UEAA@XZ + ??1bad_array_new_length@std@@UEAA@XZ + ??1bad_cast@std@@UEAA@XZ + ??1dollyv2_llm@@UEAA@XZ + ??1dollyv2_model@@QEAA@XZ + ??1error_category@std@@UEAA@XZ + ??1exception@std@@UEAA@XZ + ??1falcon_context@@QEAA@XZ + ??1falcon_file_loader@@QEAA@XZ + ??1falcon_file_saver@@QEAA@XZ + ??1falcon_kv_cache@@QEAA@XZ + ??1falcon_llm@@UEAA@XZ + ??1falcon_model@@QEAA@XZ + ??1falcon_model_loader@@QEAA@XZ + ??1gpt2_llm@@UEAA@XZ + ??1gpt2_model@@QEAA@XZ + ??1gpt_neox_llm@@UEAA@XZ + ??1gpt_neox_model@@QEAA@XZ + ??1gpt_vocab@@QEAA@XZ + ??1gptj_llm@@UEAA@XZ + ??1gptj_model@@QEAA@XZ + ??1llama_buffer@@QEAA@XZ + ??1llama_context@@QEAA@XZ + ??1llama_file@@QEAA@XZ + ??1llama_file_loader@@QEAA@XZ + ??1llama_file_saver@@QEAA@XZ + ??1llama_kv_cache@@QEAA@XZ + ??1llama_llm@@UEAA@XZ + ??1llama_load_tensor@@QEAA@XZ + ??1llama_load_tensor_shard@@QEAA@XZ + ??1llama_load_tensors_map@@QEAA@XZ + ??1llama_mlock@@QEAA@XZ + ??1llama_mmap@@QEAA@XZ + ??1llama_model@@QEAA@XZ + ??1llama_model_loader@@QEAA@XZ + ??1llama_tokenizer@@QEAA@XZ + ??1llama_vocab@@QEAA@XZ + ??1locale@std@@QEAA@XZ + ??1mpt_llm@@UEAA@XZ + ??1mpt_model@@QEAA@XZ + ??1mutex@std@@QEAA@XZ + ??1param_type@?$discrete_distribution@H@std@@QEAA@XZ + ??1range_error@std@@UEAA@XZ + ??1replit_llm@@UEAA@XZ + ??1replit_model@@QEAA@XZ + ??1replit_tokenizer@@QEAA@XZ + ??1runtime_error@std@@UEAA@XZ + ??1sentry@?$basic_ostream@DU?$char_traits@D@std@@@std@@QEAA@XZ + ??1starcoder_llm@@UEAA@XZ + ??1starcoder_model@@QEAA@XZ + ??1system_error@std@@UEAA@XZ + ??1thread@std@@QEAA@XZ + ??1token_score@llama_vocab@@QEAA@XZ + ??2@YAPEAX_KPEAX@Z + ??4?$_Bt_state_t@PEBD@std@@QEAAAEAV01@AEBV01@@Z + ??4?$_Bt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAAAEAV01@AEBV01@@Z + ??4?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAAAEAV01@$$QEAV01@@Z + ??4?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAAAEAV01@$$QEAV01@@Z + ??4?$_Tgt_state_t@PEBD@std@@QEAAAEAV01@AEBV01@@Z + ??4?$_Tgt_state_t@PEBD@std@@QEAAXAEBV?$_Bt_state_t@PEBD@1@@Z + ??4?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAAAEAV01@AEBV01@@Z + ??4?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEAAXAEBV?$_Bt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@@Z + ??4?$_Vb_reference@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAAEAV01@AEBV01@@Z + ??4?$_Vb_reference@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAAEAV01@_N@Z + ??4?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAAAEAV01@$$QEAD@Z + ??4?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAAAEAV01@AEBD@Z + ??4?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV01@$$QEAV01@@Z + ??4?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV01@AEBV01@@Z + ??4?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV01@QEBD@Z + ??4?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAAAEAV01@$$QEAV01@@Z + ??4?$unordered_set@HU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@H@2@@std@@QEAAAEAV01@$$QEAV01@@Z + ??4?$vector@IV?$allocator@I@std@@@std@@QEAAAEAV01@AEBV01@@Z + ??4?$vector@IV?$allocator@I@std@@@std@@QEAAAEAV01@V?$initializer_list@I@1@@Z + ??4?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@QEAAAEAV01@AEBV01@@Z + ??4?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@QEAAAEAV01@AEBV01@@Z + ??4?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAAAEAV01@$$QEAV01@@Z + ??4?$vector@_NV?$allocator@_N@std@@@std@@QEAAAEAV01@AEBV01@@Z + ??4llama_vocab@@QEAAAEAU0@$$QEAU0@@Z + ??4locale@std@@QEAAAEBV01@AEBV01@@Z + ??4thread@std@@QEAAAEAV01@$$QEAV01@@Z + ??8?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??8?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??8?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??8?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??8?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??8?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??8?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??8?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??8?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??8?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??8?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??8?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??8?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??8?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??8?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??8error_category@std@@QEBA_NAEBV01@@Z + ??8std@@YA_NAEBVerror_condition@0@0@Z + ??9?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??9?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??9?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??9?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??9?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??9?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??9?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??9?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBA_NAEBV01@@Z + ??9?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??9?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??9?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEBA_NAEBV01@@Z + ??9falcon_hparams@@QEBA_NAEBU0@@Z + ??9llama_hparams@@QEBA_NAEBU0@@Z + ??A?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAD_K@Z + ??A?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBAAEBD_K@Z + ??A?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEAAAEA_W_K@Z + ??A?$map@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@@std@@QEAAAEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@$$QEAH@Z + ??A?$map@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@@std@@QEAAAEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@AEBH@Z + ??A?$map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAAAEAHAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??A?$map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAAAEAPEAUggml_tensor@@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??A?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEBAAEBV?$sub_match@PEBD@1@_K@Z + ??A?$unordered_map@HHU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEAAAEAHAEBH@Z + ??A?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAAAEAHAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??A?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAAAEAPEAUggml_tensor@@$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??A?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAAAEAPEAUggml_tensor@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??A?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEAAAEAU?$pair@_KM@1@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??A?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEAAAEA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??A?$vector@HV?$allocator@H@std@@@std@@QEAAAEAH_K@Z + ??A?$vector@IV?$allocator@I@std@@@std@@QEAAAEAI_K@Z + ??A?$vector@IV?$allocator@I@std@@@std@@QEBAAEBI_K@Z + ??A?$vector@MV?$allocator@M@std@@@std@@QEAAAEAM_K@Z + ??A?$vector@NV?$allocator@N@std@@@std@@QEAAAEAN_K@Z + ??A?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAAAEAU?$pair@NH@1@_K@Z + ??A?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@QEAAAEAU_Grp_t@?$_Tgt_state_t@PEBD@1@_K@Z + ??A?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@QEAAAEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@_K@Z + ??A?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@QEAAAEAU_Loop_vals_t@1@_K@Z + ??A?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@QEAAAEAUdollyv2_layer@@_K@Z + ??A?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@QEBAAEBUdollyv2_layer@@_K@Z + ??A?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@QEAAAEAUfalcon_layer@@_K@Z + ??A?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@QEBAAEBUfalcon_layer@@_K@Z + ??A?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@QEAAAEAUgpt2_layer@@_K@Z + ??A?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@QEBAAEBUgpt2_layer@@_K@Z + ??A?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEAAAEAUgpt_neox_layer@@_K@Z + ??A?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEBAAEBUgpt_neox_layer@@_K@Z + ??A?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@QEAAAEAUgptj_layer@@_K@Z + ??A?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@QEBAAEBUgptj_layer@@_K@Z + ??A?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@QEAAAEAUllama_layer@@_K@Z + ??A?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@QEBAAEBUllama_layer@@_K@Z + ??A?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAAAEAUllama_load_tensor@@_K@Z + ??A?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEAAAEAUllama_sp_symbol@@_K@Z + ??A?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@QEAAAEAUmpt_layer@@_K@Z + ??A?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@QEBAAEBUmpt_layer@@_K@Z + ??A?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@QEAAAEAUreplit_layer@@_K@Z + ??A?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@QEBAAEBUreplit_layer@@_K@Z + ??A?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEAAAEAUstarcoder_layer@@_K@Z + ??A?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEBAAEBUstarcoder_layer@@_K@Z + ??A?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAAAEAUtoken_score@llama_vocab@@_K@Z + ??A?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEBAAEBUtoken_score@llama_vocab@@_K@Z + ??A?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@QEAAAEAV?$sub_match@PEBD@1@_K@Z + ??A?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@QEBAAEBV?$sub_match@PEBD@1@_K@Z + ??A?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAAAEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@1@_K@Z + ??A?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEAAAEAVthread@1@_K@Z + ??A?$vector@_JV?$allocator@_J@std@@@std@@QEAAAEA_J_K@Z + ??A?$vector@_KV?$allocator@_K@std@@@std@@QEAAAEA_K_K@Z + ??A?$vector@_NV?$allocator@_N@std@@@std@@QEAA?AV?$_Vb_reference@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@1@_K@Z + ??B?$_Vb_reference@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEBA_NXZ + ??B?$fpos@U_Mbstatet@@@std@@QEBA_JXZ + ??B?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@XZ + ??B?$unique_ptr@Ufalcon_model_loader@@U?$default_delete@Ufalcon_model_loader@@@std@@@std@@QEBA_NXZ + ??B?$unique_ptr@Ullama_model_loader@@U?$default_delete@Ullama_model_loader@@@std@@@std@@QEBA_NXZ + ??Bsentry@?$basic_ostream@DU?$char_traits@D@std@@@std@@QEBA_NXZ + ??C?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEBAPEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@XZ + ??C?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@QEBAPEAU?$pair@$$CBHH@1@XZ + ??C?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEBAPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@XZ + ??C?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@@std@@QEBAPEBU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@XZ + ??C?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEBAPEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@XZ + ??C?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@QEBAPEAUfalcon_file_loader@@XZ + ??C?$unique_ptr@Ufalcon_model_loader@@U?$default_delete@Ufalcon_model_loader@@@std@@@std@@QEBAPEAUfalcon_model_loader@@XZ + ??C?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@QEBAPEAUllama_file_loader@@XZ + ??C?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@std@@QEBAPEAUllama_mmap@@XZ + ??C?$unique_ptr@Ullama_model_loader@@U?$default_delete@Ullama_model_loader@@@std@@@std@@QEBAPEAUllama_model_loader@@XZ + ??D?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@QEBAAEBU?$pair@$$CBHH@1@XZ + ??D?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEBAAEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@XZ + ??D?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEBAAEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@XZ + ??D?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@QEBAAEAU?$pair@$$CBHH@1@XZ + ??D?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEBAAEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@XZ + ??D?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@QEBAAEBHXZ + ??D?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBAAEBU?$pair@$$CBHH@1@XZ + ??D?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBAAEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@XZ + ??D?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBAAEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@XZ + ??D?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBAAEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@1@XZ + ??D?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEBAAEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@XZ + ??D?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@QEBAAEAU?$pair@$$CBHH@1@XZ + ??D?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEBAAEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@XZ + ??D?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEBAAEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@1@XZ + ??D?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@QEBAAEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@1@XZ + ??D?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEBAAEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@1@XZ + ??D?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEBAAEBDXZ + ??D?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@@std@@QEBAAEBU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@1@XZ + ??D?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEBAAEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@1@XZ + ??D?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEBA?AV?$_Vb_reference@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@1@XZ + ??D?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@QEAAAEAV01@XZ + ??E?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@QEAAAEAV01@XZ + ??E?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEAA?AV01@H@Z + ??E?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEAAAEAV01@XZ + ??E?$_Tree_unchecked_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??E?$_Tree_unchecked_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??E?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAAEAV01@XZ + ??E?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAAEAV01@XZ + ??E?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAA?AV01@H@Z + ??E?$back_insert_iterator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAAAEAV01@XZ + ??F?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??F?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??F?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??F?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??F?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??F?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??F?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEAAAEAV01@XZ + ??F?$_Tree_unchecked_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??F?$_Tree_unchecked_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@U_Iterator_base0@2@@std@@QEAAAEAV01@XZ + ??F?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAAEAV01@XZ + ??F?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAAEAV01@XZ + ??F?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@QEAAAEAV01@XZ + ??G?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEBA_JAEBV01@@Z + ??G?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEBA_JAEBV01@@Z + ??G?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@QEBA_JAEBV01@@Z + ??H?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEBA?AV01@_J@Z + ??H?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEBA?AV01@_J@Z + ??H?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@QEBA?AV01@_J@Z + ??H?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@std@@QEBA?AV01@_J@Z + ??H?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@QEBA?AV01@_J@Z + ??Iregex_constants@std@@YA?AW4match_flag_type@01@W4201@0@Z + ??Iregex_constants@std@@YA?AW4syntax_option_type@01@W4201@0@Z + ??Istd@@YA?AW4_Node_flags@0@W410@0@Z + ??R@@QEBA@QEAD_KD@Z + ??R@@QEBA@QEA_WQEB_W_K12@Z + ??R@@QEBA@QEA_WQEB_W_K_W@Z + ??R@@QEBA@QEADQEBD_KD@Z + ??R@@QEBA@QEADQEBD_K12@Z + ??R@@QEBA@QEAD_KQEBD@Z + ??R@@QEBA@QEA_WQEB_W_K2_W@Z + ??R@@QEBA@QEADQEBD_K2D@Z + ??R@@QEBA@QEADQEBD_K212@Z + ??R?$_Cmp_collate@V?$regex_traits@D@std@@@std@@QEAA_NDD@Z + ??R?$_Cmp_cs@V?$regex_traits@D@std@@@std@@QEAA_NDD@Z + ??R?$_Cmp_icase@V?$_Regex_traits@D@std@@@std@@QEAA_NDD@Z + ??R?$_Cmp_icase@V?$regex_traits@D@std@@@std@@QEAA_NDD@Z + ??R?$_Conditionally_enabled_hash@H$00@std@@QEBA_KAEBH@Z + ??R?$_Wrap_istream@DU?$char_traits@D@std@@I@std@@QEAAIXZ + ??R?$default_delete@Ufalcon_file_loader@@@std@@QEBAXPEAUfalcon_file_loader@@@Z + ??R?$default_delete@Ufalcon_model_loader@@@std@@QEBAXPEAUfalcon_model_loader@@@Z + ??R?$default_delete@Ullama_file_loader@@@std@@QEBAXPEAUllama_file_loader@@@Z + ??R?$default_delete@Ullama_mmap@@@std@@QEBAXPEAUllama_mmap@@@Z + ??R?$default_delete@Ullama_model_loader@@@std@@QEBAXPEAUllama_model_loader@@@Z + ??R?$default_delete@V_Facet_base@std@@@std@@QEBAXPEAV_Facet_base@1@@Z + ??R?$default_delete@V_Node_assert@std@@@std@@QEBAXPEAV_Node_assert@1@@Z + ??R?$equal_to@H@std@@QEBA_NAEBH0@Z + ??R?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEBA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@0@Z + ??R?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEBA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@@Z + ??R?$less@H@std@@QEBA_NAEBH0@Z + ??R?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEBA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@0@Z + ??R?$less@W4e_model@@@std@@QEBA_NAEBW4e_model@@0@Z + ??R?$less@W4falcon_e_model@@@std@@QEBA_NAEBW4falcon_e_model@@0@Z + ??R?$mersenne_twister@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@std@@QEAAIXZ + ??Rcomparator@llama_sp_bigram@@QEAA_NAEAU1@0@Z + ??Sregex_constants@std@@YA?AW4match_flag_type@01@W4201@@Z + ??Tstd@@YA?AW4_Node_flags@0@W410@0@Z + ??Uregex_constants@std@@YA?AW4match_flag_type@01@W4201@0@Z + ??Ustd@@YA?AW4_Node_flags@0@W410@0@Z + ??Y?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEAAAEAV01@_J@Z + ??Y?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEAAAEAV01@_J@Z + ??Y?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAAEAV01@_J@Z + ??Y?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAAEAV01@_J@Z + ??Y?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@QEAAAEAV01@_J@Z + ??Y?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@std@@QEAAAEAV01@_J@Z + ??Y?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@QEAAAEAV01@_J@Z + ??Y?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@std@@QEAAAEAV01@_J@Z + ??Y?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@QEAAAEAV01@_J@Z + ??Y?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV01@AEBV01@@Z + ??Y?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV01@QEBD@Z + ??_4regex_constants@std@@YAAEAW4match_flag_type@01@AEAW4201@W4201@@Z + ??_5regex_constants@std@@YAAEAW4match_flag_type@01@AEAW4201@W4201@@Z + ??_5std@@YAAEAW4_Node_flags@0@AEAW410@W410@@Z + ??_6std@@YAAEAW4_Node_flags@0@AEAW410@W410@@Z + ??_D?$basic_ifstream@DU?$char_traits@D@std@@@std@@QEAAXXZ + ??_D?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAXXZ + ?Add@RingBuffer@@QEAAXH@Z + ?BatchEval@LLM@@QEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?Clear@RingBuffer@@QEAAXXZ + ?ContextLength@LLM@@QEBAHXZ + ?Detokenize@LLM@@UEBAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@Z + ?Detokenize@falcon_llm@@UEBAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@Z + ?Detokenize@llama_llm@@UEBAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@Z + ?Detokenize@replit_llm@@UEBAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@Z + ?Embeddings@LLM@@UEBAAEBV?$vector@MV?$allocator@M@std@@@std@@XZ + ?Embeddings@falcon_llm@@UEBAAEBV?$vector@MV?$allocator@M@std@@@std@@XZ + ?Embeddings@llama_llm@@UEBAAEBV?$vector@MV?$allocator@M@std@@@std@@XZ + ?EosToken@LLM@@UEBAHXZ + ?EosToken@falcon_llm@@UEBAHXZ + ?EosToken@llama_llm@@UEBAHXZ + ?EosToken@replit_llm@@UEBAHXZ + ?Eval@dollyv2_llm@@MEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?Eval@falcon_llm@@MEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?Eval@gpt2_llm@@MEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?Eval@gpt_neox_llm@@MEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?Eval@gptj_llm@@MEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?Eval@llama_llm@@MEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?Eval@mpt_llm@@MEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?Eval@replit_llm@@MEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?Eval@starcoder_llm@@MEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@HH@Z + ?EvalInternal@LLM@@AEAA_NAEBV?$vector@HV?$allocator@H@std@@@std@@H@Z + ?GetRecent@RingBuffer@@QEBA?AV?$unordered_set@HU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@H@2@@std@@H@Z + ?Init@LLM@@QEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Init@RingBuffer@@QEAAXH@Z + ?IsEosToken@LLM@@UEBA_NH@Z + ?IsEosToken@falcon_llm@@UEBA_NH@Z + ?IsEosToken@llama_llm@@UEBA_NH@Z + ?IsEosToken@replit_llm@@UEBA_NH@Z + ?Load@dollyv2_llm@@MEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Load@falcon_llm@@MEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Load@gpt2_llm@@MEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Load@gpt_neox_llm@@MEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Load@gptj_llm@@MEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Load@llama_llm@@MEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Load@mpt_llm@@MEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Load@replit_llm@@MEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Load@starcoder_llm@@MEAA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HH@Z + ?Logits@LLM@@UEAAAEAV?$vector@MV?$allocator@M@std@@@std@@XZ + ?Logits@falcon_llm@@UEAAAEAV?$vector@MV?$allocator@M@std@@@std@@XZ + ?Logits@llama_llm@@UEAAAEAV?$vector@MV?$allocator@M@std@@@std@@XZ + ?Reset@LLM@@QEAAXXZ + ?Sample@LLM@@UEBAHHMMMHH@Z + ?Sample@falcon_llm@@UEBAHHMMMHH@Z + ?Sample@llama_llm@@UEBAHHMMMHH@Z + ?Sample@replit_llm@@UEBAHHMMMHH@Z + ?Size@RingBuffer@@QEBAHXZ + ?Tokenize@LLM@@UEBA?AV?$vector@HV?$allocator@H@std@@@std@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@3@@Z + ?Tokenize@falcon_llm@@UEBA?AV?$vector@HV?$allocator@H@std@@@std@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@3@@Z + ?Tokenize@llama_llm@@UEBA?AV?$vector@HV?$allocator@H@std@@@std@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@3@@Z + ?Tokenize@replit_llm@@UEBA?AV?$vector@HV?$allocator@H@std@@@std@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@3@@Z + ?VocabSize@LLM@@UEBAHXZ + ?VocabSize@falcon_llm@@UEBAHXZ + ?VocabSize@llama_llm@@UEBAHXZ + ?VocabSize@replit_llm@@UEBAHXZ + ?_Add_backreference@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXI@Z + ?_Add_bol@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXXZ + ?_Add_char@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXD@Z + ?_Add_char_to_array@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEAAXD@Z + ?_Add_char_to_bitmap@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEAAXD@Z + ?_Add_char_to_class@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXD@Z + ?_Add_class@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXXZ + ?_Add_coll@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXPEBD0_J@Z + ?_Add_dot@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXXZ + ?_Add_elts@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEAAXPEAV?$_Node_class@DV?$regex_traits@D@std@@@2@F_N@Z + ?_Add_eol@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXXZ + ?_Add_equiv@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXPEBD0_J@Z + ?_Add_named_class@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXF_N@Z + ?_Add_range@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXDD@Z + ?_Add_rep@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXHH_N@Z + ?_Add_str_node@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Add_wbound@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXXZ + ?_Adjust_manually_vector_aligned@std@@YAXAEAPEAXAEA_K@Z + ?_Adopt@_Iterator_base0@std@@QEAAXPEBX@Z + ?_Advance@?$_Vb_iter_base@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAX_K@Z + ?_Alloc_proxy@_Container_base0@std@@QEAAXAEBU_Fake_allocator@2@@Z + ?_Alloc_sentinel_and_proxy@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Alloc_sentinel_and_proxy@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Alloc_sentinel_and_proxy@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Alloc_sentinel_and_proxy@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAXXZ + ?_Alloc_sentinel_and_proxy@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAXXZ + ?_Alloc_sentinel_and_proxy@?$list@HV?$allocator@H@std@@@std@@AEAAXXZ + ?_Alloc_sentinel_and_proxy@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@AEAAXXZ + ?_Alloc_sentinel_and_proxy@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@AEAAXXZ + ?_Alloc_sentinel_and_proxy@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAXXZ + ?_Alloc_sentinel_and_proxy@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@AEAAXXZ + ?_Alloc_sentinel_and_proxy@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@AEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAAXXZ + ?_Allocate@_Default_allocate_traits@std@@SAPEAX_K@Z + ?_Alternative@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA_NXZ + ?_Assign_grow@?$_Hash_vec@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@QEAAX_KV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@@Z + ?_Assign_grow@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@std@@QEAAX_KV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@@Z + ?_Assign_grow@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@QEAAX_KV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@@Z + ?_Assign_grow@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@std@@QEAAX_KV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@@Z + ?_Assign_grow@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@std@@QEAAX_KV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@2@@Z + ?_Assign_grow@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@std@@QEAAX_KV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@@Z + ?_At@?$_Buf@D@std@@QEBADI@Z + ?_At@?$_Circ_buf@I$0CHA@@std@@QEBAI_K@Z + ?_At@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAAAEAV?$sub_match@PEBD@2@I@Z + ?_At@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEBA?AV?$sub_match@PEBD@2@I@Z + ?_At@?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAAAEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@I@Z + ?_AtomEscape@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Base@?$_Circ_buf@I$0CHA@@std@@QEBA_K_K@Z + ?_Beg_expr@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEBA_NPEAV_Node_base@2@@Z + ?_Beg_expr@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEBA_NXZ + ?_Begin_assert_group@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAPEAV_Node_base@2@_N@Z + ?_Begin_capture_group@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAPEAV_Node_base@2@I@Z + ?_Begin_group@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAPEAV_Node_base@2@XZ + ?_Begin_if@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAPEAV_Node_base@2@PEAV32@@Z + ?_Better_match@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@AEAA_NXZ + ?_Better_match@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@AEAA_NXZ + ?_Bump_erased@_Range_eraser@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?_Bump_erased@_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?_Bump_erased@_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?_Bump_erased@_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?_Bump_erased@_Range_eraser@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?_Bump_erased@_Range_eraser@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAAXXZ + ?_Buy_nonzero@?$vector@DV?$allocator@D@std@@@std@@AEAAX_K@Z + ?_Buy_nonzero@?$vector@EV?$allocator@E@std@@@std@@AEAAX_K@Z + ?_Buy_nonzero@?$vector@HV?$allocator@H@std@@@std@@AEAAX_K@Z + ?_Buy_nonzero@?$vector@IV?$allocator@I@std@@@std@@AEAAX_K@Z + ?_Buy_nonzero@?$vector@MV?$allocator@M@std@@@std@@AEAAX_K@Z + ?_Buy_nonzero@?$vector@NV?$allocator@N@std@@@std@@AEAAX_K@Z + ?_Buy_nonzero@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@AEAAX_K@Z + ?_Buy_nonzero@?$vector@_JV?$allocator@_J@std@@@std@@AEAAX_K@Z + ?_Buy_nonzero@?$vector@_KV?$allocator@_K@std@@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@DV?$allocator@D@std@@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@EV?$allocator@E@std@@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@HV?$allocator@H@std@@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@IV?$allocator@I@std@@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@MV?$allocator@M@std@@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@NV?$allocator@N@std@@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@_JV?$allocator@_J@std@@@std@@AEAAX_K@Z + ?_Buy_raw@?$vector@_KV?$allocator@_K@std@@@std@@AEAAX_K@Z + ?_Cache_locale@?$_Regex_traits@D@std@@AEAAXXZ + ?_Calculate_growth@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@CA_K_K00@Z + ?_Calculate_growth@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@CA_K_K00@Z + ?_Calculate_growth@?$vector@DV?$allocator@D@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@HV?$allocator@H@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@IV?$allocator@I@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@MV?$allocator@M@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@NV?$allocator@N@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEBA_K_K@Z + ?_Calculate_growth@?$vector@_JV?$allocator@_J@std@@@std@@AEBA_K_K@Z + ?_Calculate_loop_simplicity@std@@YAXPEAV_Node_base@1@0PEAV_Node_rep@1@@Z + ?_Ceiling_of_log_2@std@@YAK_K@Z + ?_Change_array@?$vector@DV?$allocator@D@std@@@std@@AEAAXQEAD_K1@Z + ?_Change_array@?$vector@HV?$allocator@H@std@@@std@@AEAAXQEAH_K1@Z + ?_Change_array@?$vector@IV?$allocator@I@std@@@std@@AEAAXQEAI_K1@Z + ?_Change_array@?$vector@MV?$allocator@M@std@@@std@@AEAAXQEAM_K1@Z + ?_Change_array@?$vector@NV?$allocator@N@std@@@std@@AEAAXQEAN_K1@Z + ?_Change_array@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEAAXQEAPEAUggml_tensor@@_K1@Z + ?_Change_array@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAXQEAU?$pair@NH@2@_K1@Z + ?_Change_array@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAXQEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@_K1@Z + ?_Change_array@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAXQEAU_Grp_t@?$_Tgt_state_t@PEBD@2@_K1@Z + ?_Change_array@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAXQEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@_K1@Z + ?_Change_array@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEAAXQEAU_Loop_vals_t@2@_K1@Z + ?_Change_array@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEAAXQEAUdollyv2_layer@@_K1@Z + ?_Change_array@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEAAXQEAUfalcon_layer@@_K1@Z + ?_Change_array@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEAAXQEAUgpt2_layer@@_K1@Z + ?_Change_array@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEAAXQEAUgpt_neox_layer@@_K1@Z + ?_Change_array@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEAAXQEAUgptj_layer@@_K1@Z + ?_Change_array@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEAAXQEAUllama_layer@@_K1@Z + ?_Change_array@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEAAXQEAUllama_load_tensor@@_K1@Z + ?_Change_array@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEAAXQEAUllama_load_tensor_shard@@_K1@Z + ?_Change_array@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEAAXQEAUllama_sp_bigram@@_K1@Z + ?_Change_array@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEAAXQEAUllama_sp_symbol@@_K1@Z + ?_Change_array@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAAXQEAUllama_token_data@@_K1@Z + ?_Change_array@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEAAXQEAUmpt_layer@@_K1@Z + ?_Change_array@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEAAXQEAUreplit_layer@@_K1@Z + ?_Change_array@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEAAXQEAUstarcoder_layer@@_K1@Z + ?_Change_array@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAXQEAUtoken_score@llama_vocab@@_K1@Z + ?_Change_array@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEAAXQEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@_K1@Z + ?_Change_array@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEAAXQEAV?$sub_match@PEBD@2@_K1@Z + ?_Change_array@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEAAXQEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@_K1@Z + ?_Change_array@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEAAXQEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@_K1@Z + ?_Change_array@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEAAXQEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@_K1@Z + ?_Change_array@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAXQEAVthread@2@_K1@Z + ?_Change_array@?$vector@_JV?$allocator@_J@std@@@std@@AEAAXQEA_J_K1@Z + ?_Char_to_elts@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEAAXPEBD0_JPEAPEAU?$_Sequence@D@2@@Z + ?_CharacterClass@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_CharacterClassEscape@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA_N_N@Z + ?_CharacterEscape@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA_NXZ + ?_Check_C_return@std@@YAHH@Z + ?_Check_grow_by_1@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Check_grow_by_1@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Check_grow_by_1@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Check_grow_by_1@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAXXZ + ?_Check_grow_by_1@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAXXZ + ?_Check_max_size@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEBAXXZ + ?_Check_max_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBAXXZ + ?_Check_max_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBAXXZ + ?_Check_max_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEBAXXZ + ?_Check_max_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEBAXXZ + ?_Check_max_size@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEBAXXZ + ?_Check_offset@?$_String_val@U?$_Simple_types@D@std@@@std@@QEBAX_K@Z + ?_Check_rehash_required_1@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEBA_NXZ + ?_Check_rehash_required_1@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBA_NXZ + ?_Check_rehash_required_1@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBA_NXZ + ?_Check_rehash_required_1@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEBA_NXZ + ?_Check_rehash_required_1@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEBA_NXZ + ?_Check_rehash_required_1@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEBA_NXZ + ?_Clamp_suffix_size@?$_String_val@U?$_Simple_types@D@std@@@std@@QEBA_K_K0@Z + ?_ClassAtom@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA?AW4_Prs_ret@2@XZ + ?_ClassEscape@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA?AW4_Prs_ret@2@_N@Z + ?_ClassRanges@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Clear_and_reserve_geometric@?$vector@IV?$allocator@I@std@@@std@@AEAAX_K@Z + ?_Clear_and_reserve_geometric@?$vector@NV?$allocator@N@std@@@std@@AEAAX_K@Z + ?_Clear_and_reserve_geometric@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAX_K@Z + ?_Clear_and_reserve_geometric@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAX_K@Z + ?_Clearf@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@QEAAXW4match_flag_type@regex_constants@2@@Z + ?_Clearf@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@QEAAXW4match_flag_type@regex_constants@2@@Z + ?_Compat@?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEBAXAEBV12@@Z + ?_Compat@?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEBAXAEBV12@@Z + ?_Compat@?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@QEBAXAEBV12@@Z + ?_Compile@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@QEAAPEAV_Root_node@2@XZ + ?_Construct@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAXQEAD0Urandom_access_iterator_tag@2@@Z + ?_Construct@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAXQEBD0Urandom_access_iterator_tag@2@@Z + ?_Construct_lv_contents@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAXAEBV12@@Z + ?_Construct_lv_contents@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAXAEBV12@@Z + ?_Copy_assign@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAXAEBV12@U?$integral_constant@_N$0A@@2@@Z + ?_Copy_assign@?$vector@IV?$allocator@I@std@@@std@@AEAAXAEBV12@U?$integral_constant@_N$0A@@2@@Z + ?_Copy_assign@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAXAEBV12@U?$integral_constant@_N$0A@@2@@Z + ?_Copy_assign@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAXAEBV12@U?$integral_constant@_N$0A@@2@@Z + ?_Dec@?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAXXZ + ?_DecimalDigits@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA_NXZ + ?_Del@?$_Buf@D@std@@QEAADXZ + ?_Desired_grow_bucket_count@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Desired_grow_bucket_count@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Desired_grow_bucket_count@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Desired_grow_bucket_count@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Desired_grow_bucket_count@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Desired_grow_bucket_count@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Destroy@?$vector@DV?$allocator@D@std@@@std@@AEAAXPEAD0@Z + ?_Destroy@?$vector@EV?$allocator@E@std@@@std@@AEAAXPEAE0@Z + ?_Destroy@?$vector@HV?$allocator@H@std@@@std@@AEAAXPEAH0@Z + ?_Destroy@?$vector@IV?$allocator@I@std@@@std@@AEAAXPEAI0@Z + ?_Destroy@?$vector@MV?$allocator@M@std@@@std@@AEAAXPEAM0@Z + ?_Destroy@?$vector@NV?$allocator@N@std@@@std@@AEAAXPEAN0@Z + ?_Destroy@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEAAXPEAPEAUggml_tensor@@0@Z + ?_Destroy@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAXPEAU?$pair@NH@2@0@Z + ?_Destroy@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAXPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@0@Z + ?_Destroy@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAXPEAU_Grp_t@?$_Tgt_state_t@PEBD@2@0@Z + ?_Destroy@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAXPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@0@Z + ?_Destroy@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEAAXPEAU_Loop_vals_t@2@0@Z + ?_Destroy@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEAAXPEAUdollyv2_layer@@0@Z + ?_Destroy@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEAAXPEAUfalcon_layer@@0@Z + ?_Destroy@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEAAXPEAUgpt2_layer@@0@Z + ?_Destroy@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEAAXPEAUgpt_neox_layer@@0@Z + ?_Destroy@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEAAXPEAUgptj_layer@@0@Z + ?_Destroy@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@AEAAXPEAUllama_buffer@@0@Z + ?_Destroy@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEAAXPEAUllama_layer@@0@Z + ?_Destroy@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEAAXPEAUllama_load_tensor@@0@Z + ?_Destroy@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEAAXPEAUllama_load_tensor_shard@@0@Z + ?_Destroy@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEAAXPEAUllama_sp_bigram@@0@Z + ?_Destroy@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEAAXPEAUllama_sp_symbol@@0@Z + ?_Destroy@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAAXPEAUllama_token_data@@0@Z + ?_Destroy@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEAAXPEAUmpt_layer@@0@Z + ?_Destroy@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEAAXPEAUreplit_layer@@0@Z + ?_Destroy@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEAAXPEAUstarcoder_layer@@0@Z + ?_Destroy@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAXPEAUtoken_score@llama_vocab@@0@Z + ?_Destroy@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEAAXPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@0@Z + ?_Destroy@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEAAXPEAV?$sub_match@PEBD@2@0@Z + ?_Destroy@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEAAXPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@0@Z + ?_Destroy@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEAAXPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@0@Z + ?_Destroy@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEAAXPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@0@Z + ?_Destroy@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAXPEAVthread@2@0@Z + ?_Destroy@?$vector@_JV?$allocator@_J@std@@@std@@AEAAXPEA_J0@Z + ?_Destroy@?$vector@_KV?$allocator@_K@std@@@std@@AEAAXPEA_K0@Z + ?_Destroy_node@std@@YAXPEAV_Node_base@1@0@Z + ?_Disjunction@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Do_assert_group@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAX_N@Z + ?_Do_capture_group@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Do_class@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@AEAA_NPEAV_Node_base@2@@Z + ?_Do_class@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@AEAA_NPEAV_Node_base@2@@Z + ?_Do_digits@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAHHH@Z + ?_Do_ex_class@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXW4_Meta_type@2@@Z + ?_Do_ffn@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA_ND@Z + ?_Do_ffnx@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA_ND@Z + ?_Do_hash@?$hash@H@std@@SA_KAEBH@Z + ?_Do_if@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@AEAA_NPEAV_Node_if@2@@Z + ?_Do_if@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@AEAA_NPEAV_Node_if@2@@Z + ?_Do_noncapture_group@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Do_rep0@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@AEAA_NPEAV_Node_rep@2@_N@Z + ?_Do_rep0@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@AEAA_NPEAV_Node_rep@2@_N@Z + ?_Do_rep@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@AEAA_NPEAV_Node_rep@2@_NH@Z + ?_Do_rep@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@AEAA_NPEAV_Node_rep@2@_NH@Z + ?_Effective_range@?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEBA?AU?$pair@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V12@@2@XZ + ?_Else_if@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXPEAV_Node_base@2@0@Z + ?_Empty@?$basic_regex@DV?$regex_traits@D@std@@@std@@QEBA_NXZ + ?_End_assert_group@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXPEAV_Node_base@2@@Z + ?_End_group@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXPEAV_Node_base@2@@Z + ?_End_pattern@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAPEAV_Root_node@2@XZ + ?_Endwrite@?$basic_filebuf@DU?$char_traits@D@std@@@std@@IEAA_NXZ + ?_Eos@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAX_K@Z + ?_Equal@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_NAEBV12@@Z + ?_Equal@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_NQEBD@Z + ?_Erase_noexcept@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAAEAV12@_K_K@Z + ?_Error@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXW4error_type@regex_constants@2@@Z + ?_Expand@?$_Buf@D@std@@AEAAXI@Z + ?_Expect@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXW4_Meta_type@2@W4error_type@regex_constants@2@@Z + ?_Extract@?$_In_place_key_extract_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@@std@@SAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@AEBU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@@Z + ?_Extract@?$_In_place_key_extract_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V12@_K@std@@SAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@AEBV32@AEB_K@Z + ?_Extract@?$_In_place_key_extract_map@W4e_model@@U?$pair@$$CBW4e_model@@_K@std@@@std@@SAAEBW4e_model@@AEBU?$pair@$$CBW4e_model@@_K@2@@Z + ?_Extract@?$_In_place_key_extract_map@W4falcon_e_model@@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@SAAEBW4falcon_e_model@@AEBU?$pair@$$CBW4falcon_e_model@@_K@2@@Z + ?_Extract@?$_In_place_key_extract_set@HH@std@@SAAEBHAEBH@Z + ?_Floor_of_log_2@std@@YAK_K@Z + ?_Fnv1a_append_bytes@std@@YA_K_KQEBE_K@Z + ?_Forced_rehash@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEAAX_K@Z + ?_Forced_rehash@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAAX_K@Z + ?_Forced_rehash@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAAX_K@Z + ?_Forced_rehash@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEAAX_K@Z + ?_Forced_rehash@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEAAX_K@Z + ?_Forced_rehash@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEAAX_K@Z + ?_Get@?$basic_regex@DV?$regex_traits@D@std@@@std@@QEBAPEAV_Root_node@2@XZ + ?_Get_bmax@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEBAIXZ + ?_Get_buffer_view@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA?AU_Buffer_view@12@XZ + ?_Get_first@?$_Compressed_pair@U?$default_delete@Ufalcon_file_loader@@@std@@PEAUfalcon_file_loader@@$00@std@@QEAAAEAU?$default_delete@Ufalcon_file_loader@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$default_delete@Ufalcon_model_loader@@@std@@PEAUfalcon_model_loader@@$00@std@@QEAAAEAU?$default_delete@Ufalcon_model_loader@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$default_delete@Ullama_file_loader@@@std@@PEAUllama_file_loader@@$00@std@@QEAAAEAU?$default_delete@Ullama_file_loader@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$default_delete@Ullama_mmap@@@std@@PEAUllama_mmap@@$00@std@@QEAAAEAU?$default_delete@Ullama_mmap@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$default_delete@Ullama_model_loader@@@std@@PEAUllama_model_loader@@$00@std@@QEAAAEAU?$default_delete@Ullama_model_loader@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$default_delete@V_Facet_base@std@@@std@@PEAV_Facet_base@2@$00@std@@QEAAAEAU?$default_delete@V_Facet_base@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$default_delete@V_Node_assert@std@@@std@@PEAV_Node_assert@2@$00@std@@QEAAAEAU?$default_delete@V_Node_assert@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$equal_to@H@std@@M$00@std@@QEBAAEBU?$equal_to@H@2@XZ + ?_Get_first@?$_Compressed_pair@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@M$00@std@@QEBAAEBU?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$hash@H@std@@V?$_Compressed_pair@U?$equal_to@H@std@@M$00@2@$00@std@@QEBAAEBU?$hash@H@2@XZ + ?_Get_first@?$_Compressed_pair@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_Compressed_pair@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@M$00@2@$00@std@@QEBAAEBU?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$less@H@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@2@$00@2@$00@std@@QEBAAEBU?$less@H@2@XZ + ?_Get_first@?$_Compressed_pair@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@$00@2@$00@std@@QEBAAEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@2@$00@std@@QEBAAEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$less@W4e_model@@@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@2@$00@2@$00@std@@QEBAAEBU?$less@W4e_model@@@2@XZ + ?_Get_first@?$_Compressed_pair@U?$less@W4falcon_e_model@@@std@@V?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@2@$00@2@$00@std@@QEBAAEBU?$less@W4falcon_e_model@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@D@std@@V?$_String_val@U?$_Simple_types@D@std@@@2@$00@std@@QEAAAEAV?$allocator@D@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@D@std@@V?$_String_val@U?$_Simple_types@D@std@@@2@$00@std@@QEBAAEBV?$allocator@D@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@D@std@@V?$_Vector_val@U?$_Simple_types@D@std@@@2@$00@std@@QEAAAEAV?$allocator@D@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@D@std@@V?$_Vector_val@U?$_Simple_types@D@std@@@2@$00@std@@QEBAAEBV?$allocator@D@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@E@std@@V?$_Vector_val@U?$_Simple_types@E@std@@@2@$00@std@@QEAAAEAV?$allocator@E@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@E@std@@V?$_Vector_val@U?$_Simple_types@E@std@@@2@$00@std@@QEBAAEBV?$allocator@E@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@H@std@@V?$_Vector_val@U?$_Simple_types@H@std@@@2@$00@std@@QEAAAEAV?$allocator@H@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@H@std@@V?$_Vector_val@U?$_Simple_types@H@std@@@2@$00@std@@QEBAAEBV?$allocator@H@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@I@std@@V?$_Vector_val@U?$_Simple_types@I@std@@@2@$00@std@@QEAAAEAV?$allocator@I@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@I@std@@V?$_Vector_val@U?$_Simple_types@I@std@@@2@$00@std@@QEBAAEBV?$allocator@I@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@M@std@@V?$_Vector_val@U?$_Simple_types@M@std@@@2@$00@std@@QEAAAEAV?$allocator@M@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@M@std@@V?$_Vector_val@U?$_Simple_types@M@std@@@2@$00@std@@QEBAAEBV?$allocator@M@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@N@std@@V?$_Vector_val@U?$_Simple_types@N@std@@@2@$00@std@@QEAAAEAV?$allocator@N@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@N@std@@V?$_Vector_val@U?$_Simple_types@N@std@@@2@$00@std@@QEBAAEBV?$allocator@N@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@PEAUggml_tensor@@@std@@V?$_Vector_val@U?$_Simple_types@PEAUggml_tensor@@@std@@@2@$00@std@@QEAAAEAV?$allocator@PEAUggml_tensor@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@PEAUggml_tensor@@@std@@V?$_Vector_val@U?$_Simple_types@PEAUggml_tensor@@@std@@@2@$00@std@@QEBAAEBV?$allocator@PEAUggml_tensor@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@HPEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@H@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_List_node@HPEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@HPEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@H@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_List_node@HPEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$pair@NH@std@@@std@@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$pair@NH@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$pair@NH@std@@@std@@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$pair@NH@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U_Loop_vals_t@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Loop_vals_t@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@U_Loop_vals_t@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@U_Loop_vals_t@std@@@std@@V?$_Vector_val@U?$_Simple_types@U_Loop_vals_t@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@U_Loop_vals_t@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Udollyv2_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Udollyv2_layer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Udollyv2_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Udollyv2_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Udollyv2_layer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Udollyv2_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ufalcon_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ufalcon_layer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ufalcon_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ufalcon_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ufalcon_layer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ufalcon_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ugpt2_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ugpt2_layer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ugpt2_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ugpt2_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ugpt2_layer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ugpt2_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ugpt_neox_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ugpt_neox_layer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ugpt_neox_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ugpt_neox_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ugpt_neox_layer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ugpt_neox_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ugptj_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ugptj_layer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ugptj_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ugptj_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ugptj_layer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ugptj_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_buffer@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_buffer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ullama_buffer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_buffer@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_buffer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ullama_buffer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_layer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ullama_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_layer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ullama_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_load_tensor@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_load_tensor@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ullama_load_tensor@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_load_tensor@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_load_tensor@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ullama_load_tensor@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_load_tensor_shard@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_load_tensor_shard@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ullama_load_tensor_shard@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_load_tensor_shard@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_load_tensor_shard@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ullama_load_tensor_shard@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_sp_bigram@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ullama_sp_bigram@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_sp_bigram@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ullama_sp_bigram@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_sp_symbol@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_sp_symbol@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ullama_sp_symbol@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_sp_symbol@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_sp_symbol@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ullama_sp_symbol@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_token_data@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ullama_token_data@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ullama_token_data@@@std@@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ullama_token_data@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Umpt_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Umpt_layer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Umpt_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Umpt_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Umpt_layer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Umpt_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ureplit_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ureplit_layer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ureplit_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ureplit_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ureplit_layer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ureplit_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ustarcoder_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ustarcoder_layer@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Ustarcoder_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Ustarcoder_layer@@@std@@V?$_Vector_val@U?$_Simple_types@Ustarcoder_layer@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Ustarcoder_layer@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Utoken_score@llama_vocab@@@std@@V?$_Vector_val@U?$_Simple_types@Utoken_score@llama_vocab@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Utoken_score@llama_vocab@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Utoken_score@llama_vocab@@@std@@V?$_Vector_val@U?$_Simple_types@Utoken_score@llama_vocab@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Utoken_score@llama_vocab@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$sub_match@PEBD@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$sub_match@PEBD@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$sub_match@PEBD@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$sub_match@PEBD@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$sub_match@PEBD@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$sub_match@PEBD@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@V?$_Vector_val@U?$_Simple_types@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Vthread@std@@@std@@V?$_Vector_val@U?$_Simple_types@Vthread@std@@@std@@@2@$00@std@@QEAAAEAV?$allocator@Vthread@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@Vthread@std@@@std@@V?$_Vector_val@U?$_Simple_types@Vthread@std@@@std@@@2@$00@std@@QEBAAEBV?$allocator@Vthread@std@@@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@_J@std@@V?$_Vector_val@U?$_Simple_types@_J@std@@@2@$00@std@@QEAAAEAV?$allocator@_J@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@_J@std@@V?$_Vector_val@U?$_Simple_types@_J@std@@@2@$00@std@@QEBAAEBV?$allocator@_J@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@_K@std@@V?$_Vector_val@U?$_Simple_types@_K@std@@@2@$00@std@@QEAAAEAV?$allocator@_K@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@_K@std@@V?$_Vector_val@U?$_Simple_types@_K@std@@@2@$00@std@@QEBAAEBV?$allocator@_K@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@_W@std@@V?$_String_val@U?$_Simple_types@_W@std@@@2@$00@std@@QEAAAEAV?$allocator@_W@2@XZ + ?_Get_first@?$_Compressed_pair@V?$allocator@_W@std@@V?$_String_val@U?$_Simple_types@_W@std@@@2@$00@std@@QEBAAEBV?$allocator@_W@2@XZ + ?_Get_max_bucket_size@?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@QEAAAEAMXZ + ?_Get_max_bucket_size@?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@QEBAAEBMXZ + ?_Get_max_bucket_size@?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAAAEAMXZ + ?_Get_max_bucket_size@?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEBAAEBMXZ + ?_Get_ncap@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@AEBAIXZ + ?_Get_ncap@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@AEBAIXZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@IEAAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@2@XZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@IEBAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@2@XZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@XZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@2@XZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@XZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@2@XZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@2@XZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEBAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@2@XZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAPEAV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@2@XZ + ?_Get_scary@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEBAPEBV?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@2@XZ + ?_Get_tmax@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEBAIXZ + ?_Get_traits@?$basic_regex@DV?$regex_traits@D@std@@@std@@QEBAAEBV?$regex_traits@D@2@XZ + ?_Get_value@?$_Alloc_temporary2@V?$allocator@H@std@@@std@@QEAAAEAHXZ + ?_Getal@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@IEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@2@XZ + ?_Getal@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@IEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@2@XZ + ?_Getal@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@XZ + ?_Getal@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@XZ + ?_Getal@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@XZ + ?_Getal@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@XZ + ?_Getal@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@2@XZ + ?_Getal@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@2@XZ + ?_Getal@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEAAAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@2@XZ + ?_Getal@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEBAAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@2@XZ + ?_Getal@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAAEAV?$allocator@D@2@XZ + ?_Getal@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBAAEBV?$allocator@D@2@XZ + ?_Getal@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAAEAV?$allocator@_W@2@XZ + ?_Getal@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEBAAEBV?$allocator@_W@2@XZ + ?_Getal@?$list@HV?$allocator@H@std@@@std@@AEAAAEAV?$allocator@U?$_List_node@HPEAX@std@@@2@XZ + ?_Getal@?$list@HV?$allocator@H@std@@@std@@AEBAAEBV?$allocator@U?$_List_node@HPEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@AEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@AEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@AEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@AEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@AEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@AEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@AEAAAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@2@XZ + ?_Getal@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@AEBAAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@2@XZ + ?_Getal@?$vector@DV?$allocator@D@std@@@std@@AEAAAEAV?$allocator@D@2@XZ + ?_Getal@?$vector@DV?$allocator@D@std@@@std@@AEBAAEBV?$allocator@D@2@XZ + ?_Getal@?$vector@EV?$allocator@E@std@@@std@@AEAAAEAV?$allocator@E@2@XZ + ?_Getal@?$vector@EV?$allocator@E@std@@@std@@AEBAAEBV?$allocator@E@2@XZ + ?_Getal@?$vector@HV?$allocator@H@std@@@std@@AEAAAEAV?$allocator@H@2@XZ + ?_Getal@?$vector@HV?$allocator@H@std@@@std@@AEBAAEBV?$allocator@H@2@XZ + ?_Getal@?$vector@IV?$allocator@I@std@@@std@@AEAAAEAV?$allocator@I@2@XZ + ?_Getal@?$vector@IV?$allocator@I@std@@@std@@AEBAAEBV?$allocator@I@2@XZ + ?_Getal@?$vector@MV?$allocator@M@std@@@std@@AEAAAEAV?$allocator@M@2@XZ + ?_Getal@?$vector@MV?$allocator@M@std@@@std@@AEBAAEBV?$allocator@M@2@XZ + ?_Getal@?$vector@NV?$allocator@N@std@@@std@@AEAAAEAV?$allocator@N@2@XZ + ?_Getal@?$vector@NV?$allocator@N@std@@@std@@AEBAAEBV?$allocator@N@2@XZ + ?_Getal@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEAAAEAV?$allocator@PEAUggml_tensor@@@2@XZ + ?_Getal@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEBAAEBV?$allocator@PEAUggml_tensor@@@2@XZ + ?_Getal@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAAEAV?$allocator@U?$pair@NH@std@@@2@XZ + ?_Getal@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEBAAEBV?$allocator@U?$pair@NH@std@@@2@XZ + ?_Getal@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAAEAV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@XZ + ?_Getal@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEBAAEBV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@XZ + ?_Getal@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@2@XZ + ?_Getal@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEBAAEBV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@2@XZ + ?_Getal@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAAEAV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@XZ + ?_Getal@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEBAAEBV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@XZ + ?_Getal@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEAAAEAV?$allocator@U_Loop_vals_t@std@@@2@XZ + ?_Getal@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEBAAEBV?$allocator@U_Loop_vals_t@std@@@2@XZ + ?_Getal@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEAAAEAV?$allocator@Udollyv2_layer@@@2@XZ + ?_Getal@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEBAAEBV?$allocator@Udollyv2_layer@@@2@XZ + ?_Getal@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEAAAEAV?$allocator@Ufalcon_layer@@@2@XZ + ?_Getal@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEBAAEBV?$allocator@Ufalcon_layer@@@2@XZ + ?_Getal@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEAAAEAV?$allocator@Ugpt2_layer@@@2@XZ + ?_Getal@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEBAAEBV?$allocator@Ugpt2_layer@@@2@XZ + ?_Getal@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEAAAEAV?$allocator@Ugpt_neox_layer@@@2@XZ + ?_Getal@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEBAAEBV?$allocator@Ugpt_neox_layer@@@2@XZ + ?_Getal@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEAAAEAV?$allocator@Ugptj_layer@@@2@XZ + ?_Getal@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEBAAEBV?$allocator@Ugptj_layer@@@2@XZ + ?_Getal@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@AEAAAEAV?$allocator@Ullama_buffer@@@2@XZ + ?_Getal@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@AEBAAEBV?$allocator@Ullama_buffer@@@2@XZ + ?_Getal@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEAAAEAV?$allocator@Ullama_layer@@@2@XZ + ?_Getal@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEBAAEBV?$allocator@Ullama_layer@@@2@XZ + ?_Getal@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEAAAEAV?$allocator@Ullama_load_tensor@@@2@XZ + ?_Getal@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEBAAEBV?$allocator@Ullama_load_tensor@@@2@XZ + ?_Getal@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEAAAEAV?$allocator@Ullama_load_tensor_shard@@@2@XZ + ?_Getal@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEBAAEBV?$allocator@Ullama_load_tensor_shard@@@2@XZ + ?_Getal@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEAAAEAV?$allocator@Ullama_sp_bigram@@@2@XZ + ?_Getal@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEBAAEBV?$allocator@Ullama_sp_bigram@@@2@XZ + ?_Getal@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEAAAEAV?$allocator@Ullama_sp_symbol@@@2@XZ + ?_Getal@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEBAAEBV?$allocator@Ullama_sp_symbol@@@2@XZ + ?_Getal@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAAAEAV?$allocator@Ullama_token_data@@@2@XZ + ?_Getal@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEBAAEBV?$allocator@Ullama_token_data@@@2@XZ + ?_Getal@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEAAAEAV?$allocator@Umpt_layer@@@2@XZ + ?_Getal@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEBAAEBV?$allocator@Umpt_layer@@@2@XZ + ?_Getal@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEAAAEAV?$allocator@Ureplit_layer@@@2@XZ + ?_Getal@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEBAAEBV?$allocator@Ureplit_layer@@@2@XZ + ?_Getal@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEAAAEAV?$allocator@Ustarcoder_layer@@@2@XZ + ?_Getal@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEBAAEBV?$allocator@Ustarcoder_layer@@@2@XZ + ?_Getal@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAAEAV?$allocator@Utoken_score@llama_vocab@@@2@XZ + ?_Getal@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEBAAEBV?$allocator@Utoken_score@llama_vocab@@@2@XZ + ?_Getal@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEAAAEAV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Getal@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEBAAEBV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Getal@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEAAAEAV?$allocator@V?$sub_match@PEBD@std@@@2@XZ + ?_Getal@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEBAAEBV?$allocator@V?$sub_match@PEBD@std@@@2@XZ + ?_Getal@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEAAAEAV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@XZ + ?_Getal@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEBAAEBV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@XZ + ?_Getal@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEAAAEAV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@XZ + ?_Getal@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEBAAEBV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@XZ + ?_Getal@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEAAAEAV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@XZ + ?_Getal@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEBAAEBV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@XZ + ?_Getal@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAAEAV?$allocator@Vthread@std@@@2@XZ + ?_Getal@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEBAAEBV?$allocator@Vthread@std@@@2@XZ + ?_Getal@?$vector@_JV?$allocator@_J@std@@@std@@AEAAAEAV?$allocator@_J@2@XZ + ?_Getal@?$vector@_JV?$allocator@_J@std@@@std@@AEBAAEBV?$allocator@_J@2@XZ + ?_Getal@?$vector@_KV?$allocator@_K@std@@@std@@AEAAAEAV?$allocator@_K@2@XZ + ?_Getal@?$vector@_KV?$allocator@_K@std@@@std@@AEBAAEBV?$allocator@_K@2@XZ + ?_Getcat@?$collate@D@std@@SA_KPEAPEBVfacet@locale@2@PEBV42@@Z + ?_Getcoll@?$_Regex_traits@D@std@@QEBAPEBV?$collate@D@2@XZ + ?_Getcomp@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@IEBAAEBU?$less@H@2@XZ + ?_Getcomp@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBAAEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Getcomp@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBAAEBU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@XZ + ?_Getcomp@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@IEBAAEBU?$less@W4e_model@@@2@XZ + ?_Getcomp@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@IEBAAEBU?$less@W4falcon_e_model@@@2@XZ + ?_Getcont@_Iterator_base0@std@@QEBAPEBU_Container_base0@2@XZ + ?_Getctype@?$_Regex_traits@D@std@@QEBAPEBV?$ctype@D@2@XZ + ?_Getfacet@locale@std@@QEBAPEBVfacet@12@_K@Z + ?_Getmark@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEBAPEAV_Node_base@2@XZ + ?_Getptr@?$_Vb_reference@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEBAPEBIXZ + ?_Getstate@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@CAHH@Z + ?_HexDigits@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXH@Z + ?_IdentityEscape@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA_NXZ + ?_Inc@?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@QEAAXXZ + ?_Init@?$basic_filebuf@DU?$char_traits@D@std@@@std@@IEAAXPEAU_iobuf@@W4_Initfl@12@@Z + ?_Init@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@IEAAXPEBD_KH@Z + ?_Init@?$collate@D@std@@IEAAXAEBV_Locinfo@2@@Z + ?_Init@?$wstring_convert@V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@_WV?$allocator@_W@2@V?$allocator@D@2@@std@@AEAAXPEBV?$codecvt_utf8@_W$0BAPPPP@$0A@@2@@Z + ?_Init@param_type@?$discrete_distribution@H@std@@QEAAX_N@Z + ?_Initcvt@?$basic_filebuf@DU?$char_traits@D@std@@@std@@IEAAXAEBV?$codecvt@DDU_Mbstatet@@@2@@Z + ?_Insert@?$_Buf@D@std@@QEAAXD@Z + ?_Insert_n@?$vector@_NV?$allocator@_N@std@@@std@@QEAA?AV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@2@V?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@2@_KAEB_N@Z + ?_Insert_new_node_before@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEAAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@2@_KQEAU32@1@Z + ?_Insert_new_node_before@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@_KQEAU32@1@Z + ?_Insert_new_node_before@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@_KQEAU32@1@Z + ?_Insert_new_node_before@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@2@_KQEAU32@1@Z + ?_Insert_new_node_before@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@_KQEAU32@1@Z + ?_Insert_new_node_before@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEAAPEAU?$_List_node@HPEAX@2@_KQEAU32@1@Z + ?_Insert_node@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@CAXPEAV_Node_base@2@0@Z + ?_Insert_node@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@2@U?$_Tree_id@PEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@2@QEAU32@@Z + ?_Insert_node@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@U?$_Tree_id@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@QEAU32@@Z + ?_Insert_node@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@U?$_Tree_id@PEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@QEAU32@@Z + ?_Insert_node@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@2@U?$_Tree_id@PEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@2@QEAU32@@Z + ?_Insert_node@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@2@U?$_Tree_id@PEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@2@QEAU32@@Z + ?_Insert_x@?$vector@_NV?$allocator@_N@std@@@std@@QEAA_KV?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@2@_K@Z + ?_IsIdentityEscape@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEBA_NXZ + ?_Is_esc@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEBA_NXZ + ?_Is_wbound@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@AEBA_NXZ + ?_Is_wbound@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@AEBA_NXZ + ?_Is_word@std@@YA_ND@Z + ?_Is_word@std@@YA_NE@Z + ?_Kfn@?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@SAAEBHAEBH@Z + ?_Large_string_engaged@?$_String_val@U?$_Simple_types@D@std@@@std@@QEBA_NXZ + ?_Large_string_engaged@?$_String_val@U?$_Simple_types@_W@std@@@std@@QEBA_NXZ + ?_Link_node@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEAAPEAV_Node_base@2@PEAV32@@Z + ?_Lock@?$basic_filebuf@DU?$char_traits@D@std@@@std@@UEAAXXZ + ?_Lrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@2@@Z + ?_Lrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@@Z + ?_Lrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@@Z + ?_Lrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@2@@Z + ?_Lrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@2@@Z + ?_Make_const_iter@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEBA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@@Z + ?_Make_const_iter@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEBA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@@Z + ?_Make_iter@?$list@HV?$allocator@H@std@@@std@@QEBA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@2@PEAU?$_List_node@HPEAX@2@@Z + ?_Make_iter@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEBA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@PEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@2@@Z + ?_Make_iter@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEBA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@@Z + ?_Make_iter@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEBA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@PEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@@Z + ?_Make_iter@?$vector@_NV?$allocator@_N@std@@@std@@QEAA?AV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@2@V?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@2@@Z + ?_Make_iterator@?$vector@HV?$allocator@H@std@@@std@@AEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@2@QEAH@Z + ?_Makestr@_System_error@std@@CA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@Verror_code@2@V32@@Z + ?_Maklocwcs@std@@YAPEA_WPEB_W@Z + ?_Mark_count@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@QEBAIXZ + ?_Mark_final@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXXZ + ?_Mask@?$_Vb_reference@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@std@@IEBAIXZ + ?_Match_pat@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@AEAA_NPEAV_Node_base@2@@Z + ?_Match_pat@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@AEAA_NPEAV_Node_base@2@@Z + ?_Max@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@SAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@2@PEAU32@@Z + ?_Max@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@SAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@2@PEAU32@@Z + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEAAAEAMXZ + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEBAAEBMXZ + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAAAEAMXZ + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBAAEBMXZ + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAAAEAMXZ + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBAAEBMXZ + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEAAAEAMXZ + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEBAAEBMXZ + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEAAAEAMXZ + ?_Max_bucket_size@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEBAAEBMXZ + ?_Max_bucket_size@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEAAAEAMXZ + ?_Max_bucket_size@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEBAAEBMXZ + ?_Memcpy_val_from@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAXAEBV12@@Z + ?_Memcpy_val_from@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAXAEBV12@@Z + ?_Min@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@SAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@2@PEAU32@@Z + ?_Min@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@SAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@2@PEAU32@@Z + ?_Min_load_factor_buckets@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Min_load_factor_buckets@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Min_load_factor_buckets@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Min_load_factor_buckets@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Min_load_factor_buckets@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Min_load_factor_buckets@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEBA_K_K@Z + ?_Move_assign@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@AEAAXAEAV12@U_Equal_allocators@2@@Z + ?_Move_assign@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@AEAAXAEAV12@U_Equal_allocators@2@@Z + ?_Move_assign@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAXAEAV12@U_Equal_allocators@2@@Z + ?_Move_assign@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAXAEAV12@U_Equal_allocators@2@@Z + ?_Mymtx@_Mutex_base@std@@AEAAPEAU_Mtx_internal_imp_t@@XZ + ?_Myptr@?$_String_val@U?$_Simple_types@D@std@@@std@@QEAAPEADXZ + ?_Myptr@?$_String_val@U?$_Simple_types@D@std@@@std@@QEBAPEBDXZ + ?_Myptr@?$_String_val@U?$_Simple_types@_W@std@@@std@@QEAAPEA_WXZ + ?_Myptr@?$_String_val@U?$_Simple_types@_W@std@@@std@@QEBAPEB_WXZ + ?_Negate@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXXZ + ?_New_node@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@AEAAPEAV_Node_base@2@W4_Node_type@2@@Z + ?_Next@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Null@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAAAEAV?$sub_match@PEBD@2@XZ + ?_Null@?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAAAEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?_Nw@?$_Vb_val@V?$allocator@_N@std@@@std@@SA_K_K@Z + ?_OctalDigits@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA_NXZ + ?_Orphan_all@_Container_base0@std@@QEAAXXZ + ?_Orphan_non_end@?$_List_val@U?$_List_simple_types@H@std@@@std@@QEAAXXZ + ?_Orphan_non_end@?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@QEAAXXZ + ?_Orphan_non_end@?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@QEAAXXZ + ?_Orphan_non_end@?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAAXXZ + ?_Orphan_non_end@?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@QEAAXXZ + ?_Orphan_non_end@?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@QEAAXXZ + ?_Orphan_range@?$vector@DV?$allocator@D@std@@@std@@AEBAXPEAD0@Z + ?_Orphan_range@?$vector@HV?$allocator@H@std@@@std@@AEBAXPEAH0@Z + ?_Orphan_range@?$vector@IV?$allocator@I@std@@@std@@AEBAXPEAI0@Z + ?_Orphan_range@?$vector@MV?$allocator@M@std@@@std@@AEBAXPEAM0@Z + ?_Orphan_range@?$vector@NV?$allocator@N@std@@@std@@AEBAXPEAN0@Z + ?_Orphan_range@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEBAXPEAPEAUggml_tensor@@0@Z + ?_Orphan_range@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEBAXPEAU?$pair@NH@2@0@Z + ?_Orphan_range@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEBAXPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@0@Z + ?_Orphan_range@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEBAXPEAU_Grp_t@?$_Tgt_state_t@PEBD@2@0@Z + ?_Orphan_range@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEBAXPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@0@Z + ?_Orphan_range@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEBAXPEAU_Loop_vals_t@2@0@Z + ?_Orphan_range@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEBAXPEAUdollyv2_layer@@0@Z + ?_Orphan_range@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEBAXPEAUfalcon_layer@@0@Z + ?_Orphan_range@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEBAXPEAUgpt2_layer@@0@Z + ?_Orphan_range@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEBAXPEAUgpt_neox_layer@@0@Z + ?_Orphan_range@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEBAXPEAUgptj_layer@@0@Z + ?_Orphan_range@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEBAXPEAUllama_layer@@0@Z + ?_Orphan_range@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEBAXPEAUllama_load_tensor@@0@Z + ?_Orphan_range@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEBAXPEAUllama_load_tensor_shard@@0@Z + ?_Orphan_range@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEBAXPEAUllama_sp_bigram@@0@Z + ?_Orphan_range@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEBAXPEAUllama_sp_symbol@@0@Z + ?_Orphan_range@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEBAXPEAUllama_token_data@@0@Z + ?_Orphan_range@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEBAXPEAUmpt_layer@@0@Z + ?_Orphan_range@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEBAXPEAUreplit_layer@@0@Z + ?_Orphan_range@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEBAXPEAUstarcoder_layer@@0@Z + ?_Orphan_range@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEBAXPEAUtoken_score@llama_vocab@@0@Z + ?_Orphan_range@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEBAXPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@0@Z + ?_Orphan_range@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEBAXPEAV?$sub_match@PEBD@2@0@Z + ?_Orphan_range@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEBAXPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@0@Z + ?_Orphan_range@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEBAXPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@0@Z + ?_Orphan_range@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEBAXPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@0@Z + ?_Orphan_range@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEBAXPEAVthread@2@0@Z + ?_Orphan_range@?$vector@_JV?$allocator@_J@std@@@std@@AEBAXPEA_J0@Z + ?_Pfx@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAAAEAV?$sub_match@PEBD@2@XZ + ?_Pfx@?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAAAEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?_Pocma_both@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@AEAAXAEAV12@@Z + ?_Pocma_both@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@AEAAXAEAV12@@Z + ?_Quantifier@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Reallocate_exactly@?$vector@MV?$allocator@M@std@@@std@@AEAAX_K@Z + ?_Reallocate_exactly@?$vector@NV?$allocator@N@std@@@std@@AEAAX_K@Z + ?_Reallocate_exactly@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAX_K@Z + ?_Reallocate_exactly@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAAX_K@Z + ?_Refill_lower@?$mersenne_twister@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@std@@IEAAXXZ + ?_Refill_upper@?$mersenne_twister@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@std@@IEAAXXZ + ?_Rehash_for_1@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Rehash_for_1@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Rehash_for_1@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Rehash_for_1@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Rehash_for_1@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@IEAAXXZ + ?_Rehash_for_1@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@IEAAXXZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@QEAAPEAU?$_List_node@HPEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@2@XZ + ?_Release@?$_Alloc_construct_ptr@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@2@XZ + ?_Release@?$_Uninitialized_backout@PEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@QEAAPEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@XZ + ?_Release@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@XZ + ?_Release@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@XZ + ?_Release@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@XZ + ?_Release@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@2@XZ + ?_Release@?$_Uninitialized_backout@PEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@D@std@@@std@@QEAAPEADXZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@E@std@@@std@@QEAAPEAEXZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@H@std@@@std@@QEAAPEAHXZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@I@std@@@std@@QEAAPEAIXZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@M@std@@@std@@QEAAPEAMXZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@N@std@@@std@@QEAAPEANXZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAAPEAPEAUggml_tensor@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@U?$pair@NH@std@@@std@@@std@@QEAAPEAU?$pair@NH@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAAPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@QEAAPEAU_Grp_t@?$_Tgt_state_t@PEBD@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@QEAAPEAU_Loop_vals_t@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Udollyv2_layer@@@std@@@std@@QEAAPEAUdollyv2_layer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ufalcon_layer@@@std@@@std@@QEAAPEAUfalcon_layer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ugpt2_layer@@@std@@@std@@QEAAPEAUgpt2_layer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEAAPEAUgpt_neox_layer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ugptj_layer@@@std@@@std@@QEAAPEAUgptj_layer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ullama_buffer@@@std@@@std@@QEAAPEAUllama_buffer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ullama_layer@@@std@@@std@@QEAAPEAUllama_layer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAAPEAUllama_load_tensor@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAAPEAUllama_load_tensor_shard@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAAPEAUllama_sp_bigram@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEAAPEAUllama_sp_symbol@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ullama_token_data@@@std@@@std@@QEAAPEAUllama_token_data@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Umpt_layer@@@std@@@std@@QEAAPEAUmpt_layer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ureplit_layer@@@std@@@std@@QEAAPEAUreplit_layer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEAAPEAUstarcoder_layer@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAAPEAUtoken_score@llama_vocab@@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@QEAAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAAPEAV?$sub_match@PEBD@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@QEAAPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@QEAAPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@Vthread@std@@@std@@@std@@QEAAPEAVthread@2@XZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@_J@std@@@std@@QEAAPEA_JXZ + ?_Release@?$_Uninitialized_backout_al@V?$allocator@_K@std@@@std@@QEAAPEA_KXZ + ?_Release@_Fake_proxy_ptr_impl@std@@QEAAXXZ + ?_Reset@?$basic_regex@DV?$regex_traits@D@std@@@std@@AEAAXPEAV_Root_node@2@@Z + ?_Reset_back@?$basic_filebuf@DU?$char_traits@D@std@@@std@@AEAAXXZ + ?_Resize@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAAXI@Z + ?_Resize@?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAAXI@Z + ?_Rrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@2@@Z + ?_Rrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@@Z + ?_Rrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@@Z + ?_Rrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4e_model@@_K@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@2@@Z + ?_Rrotate@?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@@std@@QEAAXPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@2@@Z + ?_Seek_to@?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEAAXPEBD@Z + ?_Seek_to@?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@QEAAXPEBN@Z + ?_Set_back@?$basic_filebuf@DU?$char_traits@D@std@@@std@@AEAAXXZ + ?_Setf@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@QEAAXW4match_flag_type@regex_constants@2@@Z + ?_Setf@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@QEAAXW4match_flag_type@regex_constants@2@@Z + ?_Setlong@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXXZ + ?_Sfx@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEAAAEAV?$sub_match@PEBD@2@XZ + ?_Sfx@?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAAAEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?_Size@?$_Buf@D@std@@QEBAIXZ + ?_Skip@?$_Matcher@PEBDDV?$regex_traits@D@std@@PEBD@std@@QEAAPEBDPEBD0PEAV_Node_base@2@@Z + ?_Skip@?$_Matcher@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@DV?$regex_traits@D@2@V12@@std@@QEAA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@2@V32@0PEAV_Node_base@2@@Z + ?_Str@?$_Buf@D@std@@QEBAPEBDXZ + ?_Swap_proxy_and_iterators@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAXAEAV12@@Z + ?_Swap_proxy_and_iterators@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAXAEAV12@@Z + ?_Swap_proxy_and_iterators@_Container_base0@std@@QEAAXAEAU12@@Z + ?_Swap_val@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@AEAAXAEAV12@@Z + ?_Swap_val@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@AEAAXAEAV12@@Z + ?_Swap_val@?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@QEAAXAEAV12@@Z + ?_Swap_val@?$_Vector_val@U?$_Simple_types@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@QEAAXAEAV12@@Z + ?_Swap_val@?$list@HV?$allocator@H@std@@@std@@AEAAXAEAV12@@Z + ?_Swap_val@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@AEAAXAEAV12@@Z + ?_Take_contents@?$_Vector_val@U?$_Simple_types@Utoken_score@llama_vocab@@@std@@@std@@QEAAXAEAV12@@Z + ?_Take_contents@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAXAEAV12@@Z + ?_Take_contents@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAXAEAV12@@Z + ?_Throw_bad_array_new_length@std@@YAXXZ + ?_Throw_bad_cast@std@@YAXXZ + ?_Throw_range_error@std@@YAXQEBD@Z + ?_Throw_system_error@std@@YAXW4errc@1@@Z + ?_Throw_tree_length_error@std@@YAXXZ + ?_Tidy@?$_Builder@PEBDDV?$regex_traits@D@std@@@std@@QEAAXXZ + ?_Tidy@?$_Hash_vec@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@QEAAXXZ + ?_Tidy@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@std@@QEAAXXZ + ?_Tidy@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@QEAAXXZ + ?_Tidy@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@std@@QEAAXXZ + ?_Tidy@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@std@@QEAAXXZ + ?_Tidy@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@std@@QEAAXXZ + ?_Tidy@?$_Node_class@DV?$regex_traits@D@std@@@std@@QEAAXPEAU?$_Sequence@D@2@@Z + ?_Tidy@?$basic_regex@DV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Tidy@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@IEAAXXZ + ?_Tidy@?$list@HV?$allocator@H@std@@@std@@AEAAXXZ + ?_Tidy@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@DV?$allocator@D@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@EV?$allocator@E@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@HV?$allocator@H@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@IV?$allocator@I@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@MV?$allocator@M@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@NV?$allocator@N@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAXXZ + ?_Tidy@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAXXZ + ?_Tidy@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAXXZ + ?_Tidy@?$vector@_JV?$allocator@_J@std@@@std@@AEAAXXZ + ?_Tidy@?$vector@_KV?$allocator@_K@std@@@std@@AEAAXXZ + ?_Tidy_deallocate@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAXXZ + ?_Tidy_deallocate@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAXXZ + ?_Tidy_init@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAAXXZ + ?_Tidy_init@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEAAXXZ + ?_Trans@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAAXXZ + ?_Trim@?$vector@_NV?$allocator@_N@std@@@std@@QEAAX_K@Z + ?_Ufill@?$vector@DV?$allocator@D@std@@@std@@AEAAPEADPEAD_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@EV?$allocator@E@std@@@std@@AEAAPEAEPEAE_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@HV?$allocator@H@std@@@std@@AEAAPEAHPEAH_KAEBH@Z + ?_Ufill@?$vector@IV?$allocator@I@std@@@std@@AEAAPEAIPEAI_KAEBI@Z + ?_Ufill@?$vector@IV?$allocator@I@std@@@std@@AEAAPEAIPEAI_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@MV?$allocator@M@std@@@std@@AEAAPEAMPEAM_KAEBM@Z + ?_Ufill@?$vector@MV?$allocator@M@std@@@std@@AEAAPEAMPEAM_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@NV?$allocator@N@std@@@std@@AEAAPEANPEAN_KAEBN@Z + ?_Ufill@?$vector@NV?$allocator@N@std@@@std@@AEAAPEANPEAN_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAPEAU?$pair@NH@2@PEAU32@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAPEAU_Grp_t@?$_Tgt_state_t@PEBD@2@PEAU342@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@PEAU342@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEAAPEAU_Loop_vals_t@2@PEAU32@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEAAPEAUdollyv2_layer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEAAPEAUfalcon_layer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEAAPEAUgpt2_layer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEAAPEAUgpt_neox_layer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEAAPEAUgptj_layer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@AEAAPEAUllama_buffer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEAAPEAUllama_layer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEAAPEAUmpt_layer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEAAPEAUreplit_layer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEAAPEAUstarcoder_layer@@PEAU3@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAPEAUtoken_score@llama_vocab@@PEAU34@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEAAPEAV?$sub_match@PEBD@2@PEAV32@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEAAPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@PEAV32@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAPEAVthread@2@PEAV32@_KU_Value_init_tag@2@@Z + ?_Ufill@?$vector@_JV?$allocator@_J@std@@@std@@AEAAPEA_JPEA_J_KAEB_J@Z + ?_Ufill@?$vector@_KV?$allocator@_K@std@@@std@@AEAAPEA_KPEA_K_KU_Value_init_tag@2@@Z + ?_Umove@?$vector@HV?$allocator@H@std@@@std@@AEAAPEAHPEAH00@Z + ?_Umove@?$vector@MV?$allocator@M@std@@@std@@AEAAPEAMPEAM00@Z + ?_Umove@?$vector@NV?$allocator@N@std@@@std@@AEAAPEANPEAN00@Z + ?_Umove@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEAAPEAPEAUggml_tensor@@PEAPEAU3@00@Z + ?_Umove@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAPEAU?$pair@NH@2@PEAU32@00@Z + ?_Umove@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@PEAU32@00@Z + ?_Umove@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEAAPEAUllama_load_tensor@@PEAU3@00@Z + ?_Umove@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEAAPEAUllama_load_tensor_shard@@PEAU3@00@Z + ?_Umove@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEAAPEAUllama_sp_bigram@@PEAU3@00@Z + ?_Umove@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEAAPEAUllama_sp_symbol@@PEAU3@00@Z + ?_Umove@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAAPEAUllama_token_data@@PEAU3@00@Z + ?_Umove@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEAAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@PEAV32@00@Z + ?_Umove@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEAAPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@PEAV32@00@Z + ?_Umove@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEAAPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@PEAV32@00@Z + ?_Umove@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAPEAVthread@2@PEAV32@00@Z + ?_Umove_if_noexcept1@?$vector@DV?$allocator@D@std@@@std@@AEAAXPEAD00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@HV?$allocator@H@std@@@std@@AEAAXPEAH00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@IV?$allocator@I@std@@@std@@AEAAXPEAI00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@MV?$allocator@M@std@@@std@@AEAAXPEAM00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@NV?$allocator@N@std@@@std@@AEAAXPEAN00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEAAXPEAPEAUggml_tensor@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAXPEAU?$pair@NH@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAXPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAXPEAU_Grp_t@?$_Tgt_state_t@PEBD@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAXPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEAAXPEAU_Loop_vals_t@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEAAXPEAUdollyv2_layer@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEAAXPEAUfalcon_layer@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEAAXPEAUgpt2_layer@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEAAXPEAUgpt_neox_layer@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEAAXPEAUgptj_layer@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEAAXPEAUllama_layer@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEAAXPEAUllama_load_tensor@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEAAXPEAUllama_load_tensor_shard@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEAAXPEAUllama_sp_bigram@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEAAXPEAUllama_sp_symbol@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAAXPEAUllama_token_data@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEAAXPEAUmpt_layer@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEAAXPEAUreplit_layer@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEAAXPEAUstarcoder_layer@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAXPEAUtoken_score@llama_vocab@@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEAAXPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEAAXPEAV?$sub_match@PEBD@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEAAXPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEAAXPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEAAXPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAXPEAVthread@2@00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept1@?$vector@_JV?$allocator@_J@std@@@std@@AEAAXPEA_J00U?$integral_constant@_N$00@2@@Z + ?_Umove_if_noexcept@?$vector@DV?$allocator@D@std@@@std@@AEAAXPEAD00@Z + ?_Umove_if_noexcept@?$vector@HV?$allocator@H@std@@@std@@AEAAXPEAH00@Z + ?_Umove_if_noexcept@?$vector@IV?$allocator@I@std@@@std@@AEAAXPEAI00@Z + ?_Umove_if_noexcept@?$vector@MV?$allocator@M@std@@@std@@AEAAXPEAM00@Z + ?_Umove_if_noexcept@?$vector@NV?$allocator@N@std@@@std@@AEAAXPEAN00@Z + ?_Umove_if_noexcept@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEAAXPEAPEAUggml_tensor@@00@Z + ?_Umove_if_noexcept@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@AEAAXPEAU?$pair@NH@2@00@Z + ?_Umove_if_noexcept@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@AEAAXPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@00@Z + ?_Umove_if_noexcept@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@AEAAXPEAU_Grp_t@?$_Tgt_state_t@PEBD@2@00@Z + ?_Umove_if_noexcept@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@AEAAXPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@00@Z + ?_Umove_if_noexcept@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@AEAAXPEAU_Loop_vals_t@2@00@Z + ?_Umove_if_noexcept@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@AEAAXPEAUdollyv2_layer@@00@Z + ?_Umove_if_noexcept@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@AEAAXPEAUfalcon_layer@@00@Z + ?_Umove_if_noexcept@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@AEAAXPEAUgpt2_layer@@00@Z + ?_Umove_if_noexcept@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@AEAAXPEAUgpt_neox_layer@@00@Z + ?_Umove_if_noexcept@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@AEAAXPEAUgptj_layer@@00@Z + ?_Umove_if_noexcept@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@AEAAXPEAUllama_layer@@00@Z + ?_Umove_if_noexcept@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@AEAAXPEAUllama_load_tensor@@00@Z + ?_Umove_if_noexcept@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@AEAAXPEAUllama_load_tensor_shard@@00@Z + ?_Umove_if_noexcept@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@AEAAXPEAUllama_sp_bigram@@00@Z + ?_Umove_if_noexcept@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@AEAAXPEAUllama_sp_symbol@@00@Z + ?_Umove_if_noexcept@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@AEAAXPEAUllama_token_data@@00@Z + ?_Umove_if_noexcept@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@AEAAXPEAUmpt_layer@@00@Z + ?_Umove_if_noexcept@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@AEAAXPEAUreplit_layer@@00@Z + ?_Umove_if_noexcept@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@AEAAXPEAUstarcoder_layer@@00@Z + ?_Umove_if_noexcept@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@AEAAXPEAUtoken_score@llama_vocab@@00@Z + ?_Umove_if_noexcept@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@AEAAXPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@00@Z + ?_Umove_if_noexcept@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@AEAAXPEAV?$sub_match@PEBD@2@00@Z + ?_Umove_if_noexcept@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@AEAAXPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@00@Z + ?_Umove_if_noexcept@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@AEAAXPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@00@Z + ?_Umove_if_noexcept@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@AEAAXPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@00@Z + ?_Umove_if_noexcept@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@AEAAXPEAVthread@2@00@Z + ?_Umove_if_noexcept@?$vector@_JV?$allocator@_J@std@@@std@@AEAAXPEA_J00@Z + ?_Unchecked_begin@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@XZ + ?_Unchecked_begin@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEBA?AV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@XZ + ?_Unchecked_begin@?$list@HV?$allocator@H@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@2@XZ + ?_Unchecked_begin@?$list@HV?$allocator@H@std@@@std@@QEBA?AV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@XZ + ?_Unchecked_begin@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEBAPEBV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$vector@HV?$allocator@H@std@@@std@@QEBAPEBHXZ + ?_Unchecked_begin@?$vector@IV?$allocator@I@std@@@std@@QEBAPEBIXZ + ?_Unchecked_begin@?$vector@MV?$allocator@M@std@@@std@@QEAAPEAMXZ + ?_Unchecked_begin@?$vector@NV?$allocator@N@std@@@std@@QEAAPEANXZ + ?_Unchecked_begin@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAAPEAPEAUggml_tensor@@XZ + ?_Unchecked_begin@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAAPEAU?$pair@NH@2@XZ + ?_Unchecked_begin@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAAPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@XZ + ?_Unchecked_begin@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@QEAAPEAUllama_buffer@@XZ + ?_Unchecked_begin@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAAPEAUllama_load_tensor@@XZ + ?_Unchecked_begin@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEBAPEBUllama_load_tensor@@XZ + ?_Unchecked_begin@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAAPEAUllama_load_tensor_shard@@XZ + ?_Unchecked_begin@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEBAPEBUllama_load_tensor_shard@@XZ + ?_Unchecked_begin@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?_Unchecked_begin@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEBAPEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?_Unchecked_begin@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEBAPEBV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?_Unchecked_begin@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEAAPEAVthread@2@XZ + ?_Unchecked_end@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA?AV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@XZ + ?_Unchecked_end@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEBA?AV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@XZ + ?_Unchecked_end@?$list@HV?$allocator@H@std@@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@2@XZ + ?_Unchecked_end@?$list@HV?$allocator@H@std@@@std@@QEBA?AV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@XZ + ?_Unchecked_end@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEAA?AV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEBAPEBV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$vector@HV?$allocator@H@std@@@std@@QEBAPEBHXZ + ?_Unchecked_end@?$vector@IV?$allocator@I@std@@@std@@QEBAPEBIXZ + ?_Unchecked_end@?$vector@MV?$allocator@M@std@@@std@@QEAAPEAMXZ + ?_Unchecked_end@?$vector@NV?$allocator@N@std@@@std@@QEAAPEANXZ + ?_Unchecked_end@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAAPEAPEAUggml_tensor@@XZ + ?_Unchecked_end@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAAPEAU?$pair@NH@2@XZ + ?_Unchecked_end@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAAPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@XZ + ?_Unchecked_end@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@QEAAPEAUllama_buffer@@XZ + ?_Unchecked_end@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAAPEAUllama_load_tensor@@XZ + ?_Unchecked_end@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEBAPEBUllama_load_tensor@@XZ + ?_Unchecked_end@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAAPEAUllama_load_tensor_shard@@XZ + ?_Unchecked_end@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEBAPEBUllama_load_tensor_shard@@XZ + ?_Unchecked_end@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?_Unchecked_end@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEBAPEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?_Unchecked_end@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEBAPEBV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?_Unchecked_end@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEAAPEAVthread@2@XZ + ?_Unchecked_erase@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@AEAAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@2@PEAU32@QEAU32@@Z + ?_Unchecked_erase@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@AEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@PEAU32@QEAU32@@Z + ?_Unchecked_erase@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@AEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@PEAU32@QEAU32@@Z + ?_Unchecked_erase@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@AEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@2@PEAU32@QEAU32@@Z + ?_Unchecked_erase@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@AEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@PEAU32@QEAU32@@Z + ?_Unchecked_erase@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@AEAAPEAU?$_List_node@HPEAX@2@PEAU32@QEAU32@@Z + ?_Unchecked_splice@?$_List_val@U?$_List_simple_types@H@std@@@std@@SAPEAU?$_List_node@HPEAX@2@QEAU32@00@Z + ?_Unchecked_splice@?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@SAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@2@QEAU32@00@Z + ?_Unchecked_splice@?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@SAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@QEAU32@00@Z + ?_Unchecked_splice@?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@SAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@QEAU32@00@Z + ?_Unchecked_splice@?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@SAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@2@QEAU32@00@Z + ?_Unchecked_splice@?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@SAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@QEAU32@00@Z + ?_Unlock@?$basic_filebuf@DU?$char_traits@D@std@@@std@@UEAAXXZ + ?_Unwrapped@?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@std@@QEBA?AV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@XZ + ?_Unwrapped@?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEBAPEBDXZ + ?_Unwrapped@?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@QEBAPEADXZ + ?_Unwrapped@?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@QEBAPEBHXZ + ?_Unwrapped@?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@QEBAPEBNXZ + ?_Unwrapped@?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@std@@QEBAPEAMXZ + ?_Unwrapped@?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@std@@QEBAPEANXZ + ?_Unwrapped@?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@QEBAPEAU?$pair@NH@2@XZ + ?_Unwrapped@?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@std@@QEBAPEAUllama_sp_bigram@@XZ + ?_Unwrapped@?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@std@@QEBAPEAUllama_token_data@@XZ + ?_Unwrapped@?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@std@@QEBAPEA_KXZ + ?_Verify_offset@?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@std@@QEBAX_J@Z + ?_Verify_offset@?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@std@@QEBAX_J@Z + ?_Verify_offset@?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@std@@QEBAX_J@Z + ?_Wrapped_disjunction@?$_Parser@PEBDDV?$regex_traits@D@std@@@std@@AEAA_NXZ + ?_Xlen@?$vector@_NV?$allocator@_N@std@@@std@@QEBAXXZ + ?_Xlen_string@std@@YAXXZ + ?_Xlength@?$vector@DV?$allocator@D@std@@@std@@CAXXZ + ?_Xlength@?$vector@EV?$allocator@E@std@@@std@@CAXXZ + ?_Xlength@?$vector@HV?$allocator@H@std@@@std@@CAXXZ + ?_Xlength@?$vector@IV?$allocator@I@std@@@std@@CAXXZ + ?_Xlength@?$vector@MV?$allocator@M@std@@@std@@CAXXZ + ?_Xlength@?$vector@NV?$allocator@N@std@@@std@@CAXXZ + ?_Xlength@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@CAXXZ + ?_Xlength@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@CAXXZ + ?_Xlength@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@CAXXZ + ?_Xlength@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@CAXXZ + ?_Xlength@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@CAXXZ + ?_Xlength@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@CAXXZ + ?_Xlength@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@CAXXZ + ?_Xlength@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@CAXXZ + ?_Xlength@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@CAXXZ + ?_Xlength@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@CAXXZ + ?_Xlength@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@CAXXZ + ?_Xlength@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@CAXXZ + ?_Xlength@?$vector@_JV?$allocator@_J@std@@@std@@CAXXZ + ?_Xlength@?$vector@_KV?$allocator@_K@std@@@std@@CAXXZ + ?_Xran@?$_String_val@U?$_Simple_types@D@std@@@std@@SAXXZ + ?_Xrange@?$vector@IV?$allocator@I@std@@@std@@CAXXZ + ?_Xrange@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@CAXXZ + ?_Xrange@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@CAXXZ + ?_Xrange@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@CAXXZ + ?_Xrange@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@CAXXZ + ?_Xrange@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@CAXXZ + ?_Xrange@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@CAXXZ + ?abs@@YAMM@Z + ?add_special_token@gpt_vocab@@QEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z + ?alignment_prevents_mmap@falcon_model_loader@@QEAA_NXZ + ?alignment_prevents_mmap@llama_model_loader@@QEAA_NXZ + ?allocate@?$allocator@D@std@@QEAAPEAD_K@Z + ?allocate@?$allocator@E@std@@QEAAPEAE_K@Z + ?allocate@?$allocator@H@std@@QEAAPEAH_K@Z + ?allocate@?$allocator@I@std@@QEAAPEAI_K@Z + ?allocate@?$allocator@M@std@@QEAAPEAM_K@Z + ?allocate@?$allocator@N@std@@QEAAPEAN_K@Z + ?allocate@?$allocator@PEAUggml_tensor@@@std@@QEAAPEAPEAUggml_tensor@@_K@Z + ?allocate@?$allocator@U?$_List_node@HPEAX@std@@@std@@QEAAPEAU?$_List_node@HPEAX@2@_K@Z + ?allocate@?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@QEAAPEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@QEAAPEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@2@_K@Z + ?allocate@?$allocator@U?$pair@NH@std@@@std@@QEAAPEAU?$pair@NH@2@_K@Z + ?allocate@?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@QEAAPEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@_K@Z + ?allocate@?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@QEAAPEAU_Grp_t@?$_Tgt_state_t@PEBD@2@_K@Z + ?allocate@?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@QEAAPEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@_K@Z + ?allocate@?$allocator@U_Loop_vals_t@std@@@std@@QEAAPEAU_Loop_vals_t@2@_K@Z + ?allocate@?$allocator@Udollyv2_layer@@@std@@QEAAPEAUdollyv2_layer@@_K@Z + ?allocate@?$allocator@Ufalcon_layer@@@std@@QEAAPEAUfalcon_layer@@_K@Z + ?allocate@?$allocator@Ugpt2_layer@@@std@@QEAAPEAUgpt2_layer@@_K@Z + ?allocate@?$allocator@Ugpt_neox_layer@@@std@@QEAAPEAUgpt_neox_layer@@_K@Z + ?allocate@?$allocator@Ugptj_layer@@@std@@QEAAPEAUgptj_layer@@_K@Z + ?allocate@?$allocator@Ullama_buffer@@@std@@QEAAPEAUllama_buffer@@_K@Z + ?allocate@?$allocator@Ullama_layer@@@std@@QEAAPEAUllama_layer@@_K@Z + ?allocate@?$allocator@Ullama_load_tensor@@@std@@QEAAPEAUllama_load_tensor@@_K@Z + ?allocate@?$allocator@Ullama_load_tensor_shard@@@std@@QEAAPEAUllama_load_tensor_shard@@_K@Z + ?allocate@?$allocator@Ullama_sp_bigram@@@std@@QEAAPEAUllama_sp_bigram@@_K@Z + ?allocate@?$allocator@Ullama_sp_symbol@@@std@@QEAAPEAUllama_sp_symbol@@_K@Z + ?allocate@?$allocator@Ullama_token_data@@@std@@QEAAPEAUllama_token_data@@_K@Z + ?allocate@?$allocator@Umpt_layer@@@std@@QEAAPEAUmpt_layer@@_K@Z + ?allocate@?$allocator@Ureplit_layer@@@std@@QEAAPEAUreplit_layer@@_K@Z + ?allocate@?$allocator@Ustarcoder_layer@@@std@@QEAAPEAUstarcoder_layer@@_K@Z + ?allocate@?$allocator@Utoken_score@llama_vocab@@@std@@QEAAPEAUtoken_score@llama_vocab@@_K@Z + ?allocate@?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@QEAAPEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@_K@Z + ?allocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@_K@Z + ?allocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@_K@Z + ?allocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@_K@Z + ?allocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@2@_K@Z + ?allocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@_K@Z + ?allocate@?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAAPEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@_K@Z + ?allocate@?$allocator@V?$sub_match@PEBD@std@@@std@@QEAAPEAV?$sub_match@PEBD@2@_K@Z + ?allocate@?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@QEAAPEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@_K@Z + ?allocate@?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@QEAAPEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@_K@Z + ?allocate@?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@QEAAPEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@_K@Z + ?allocate@?$allocator@Vthread@std@@@std@@QEAAPEAVthread@2@_K@Z + ?allocate@?$allocator@_J@std@@QEAAPEA_J_K@Z + ?allocate@?$allocator@_K@std@@QEAAPEA_K_K@Z + ?allocate@?$allocator@_W@std@@QEAAPEA_W_K@Z + ?append@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@AEBV12@@Z + ?append@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@AEBV12@_K_K@Z + ?append@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@QEBD@Z + ?append@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@QEBD_K@Z + ?append@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@_KD@Z + ?append@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEAAAEAV12@QEB_W_K@Z + ?append@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEAAAEAV12@_K_W@Z + ?assign@?$_Narrow_char_traits@DH@std@@SAPEADQEAD_KD@Z + ?assign@?$_Narrow_char_traits@DH@std@@SAXAEADAEBD@Z + ?assign@?$_WChar_traits@_W@std@@SAPEA_WQEA_W_K_W@Z + ?assign@?$_WChar_traits@_W@std@@SAXAEA_WAEB_W@Z + ?assign@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@AEBV12@_K_K@Z + ?assign@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@QEBD@Z + ?assign@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@QEBD_K@Z + ?assign@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@_KD@Z + ?assign@?$vector@NV?$allocator@N@std@@@std@@QEAAX_KAEBN@Z + ?at@?$map@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@@std@@QEBAAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@AEBH@Z + ?at@?$map@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@@std@@QEBAAEB_KAEBW4e_model@@@Z + ?at@?$map@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@@std@@QEBAAEB_KAEBW4falcon_e_model@@@Z + ?at@?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEBAAEBU?$pair@_KM@2@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?at@?$vector@IV?$allocator@I@std@@@std@@QEAAAEAI_K@Z + ?at@?$vector@IV?$allocator@I@std@@@std@@QEBAAEBI_K@Z + ?at@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@QEAAAEAUllama_buffer@@_K@Z + ?at@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEAAAEAUllama_load_tensor@@_K@Z + ?at@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEBAAEBUllama_load_tensor@@_K@Z + ?at@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAAAEAUllama_load_tensor_shard@@_K@Z + ?at@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEBAAEBUllama_load_tensor_shard@@_K@Z + ?at@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAAAEAUtoken_score@llama_vocab@@_K@Z + ?at@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@QEAAAEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@_K@Z + ?at@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@QEBAAEBV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@_K@Z + ?at@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@QEAAAEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@_K@Z + ?at@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@QEBAAEBV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@_K@Z + ?back@?$vector@HV?$allocator@H@std@@@std@@QEAAAEAHXZ + ?back@?$vector@MV?$allocator@M@std@@@std@@QEAAAEAMXZ + ?begin@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@2@XZ + ?begin@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA?AV?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@2@XZ + ?begin@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@2@XZ + ?begin@?$initializer_list@H@std@@QEBAPEBHXZ + ?begin@?$initializer_list@I@std@@QEBAPEBIXZ + ?begin@?$initializer_list@PEBD@std@@QEBAPEBQEBDXZ + ?begin@?$initializer_list@U?$pair@$$CBW4e_model@@_K@std@@@std@@QEBAPEBU?$pair@$$CBW4e_model@@_K@2@XZ + ?begin@?$initializer_list@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@QEBAPEBU?$pair@$$CBW4falcon_e_model@@_K@2@XZ + ?begin@?$list@HV?$allocator@H@std@@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@2@XZ + ?begin@?$vector@HV?$allocator@H@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@2@XZ + ?begin@?$vector@HV?$allocator@H@std@@@std@@QEBA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@2@XZ + ?begin@?$vector@IV?$allocator@I@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@2@XZ + ?begin@?$vector@MV?$allocator@M@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@2@XZ + ?begin@?$vector@NV?$allocator@N@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@2@XZ + ?begin@?$vector@NV?$allocator@N@std@@@std@@QEBA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@2@XZ + ?begin@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@2@XZ + ?begin@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@2@XZ + ?begin@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@2@XZ + ?begin@?$vector@_KV?$allocator@_K@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@2@XZ + ?begin@?$vector@_NV?$allocator@_N@std@@@std@@QEAA?AV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@2@XZ + ?bucket@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEBA_KAEBH@Z + ?bucket@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEBA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?bucket@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEBA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?bucket@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEBA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?bucket@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEBA_KAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?bucket@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEBA_KAEBH@Z + ?bucket_count@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEBA_KXZ + ?bucket_count@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEBA_KXZ + ?bucket_count@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEBA_KXZ + ?bucket_count@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEBA_KXZ + ?bucket_count@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEBA_KXZ + ?bucket_count@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEBA_KXZ + ?c_str@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBAPEBDXZ + ?c_str@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEBAPEB_WXZ + ?calc_all@llama_load_tensor@@QEAAXXZ + ?calc_ne@llama_load_tensor@@QEAAXXZ + ?calc_size@llama_load_tensor@@QEAAXXZ + ?calc_size@llama_load_tensor_shard@@QEAAXXZ + ?calc_sizes@falcon_model_loader@@QEBAXPEA_K0@Z + ?calc_sizes@llama_model_loader@@QEBAXPEA_K0@Z + ?calc_split_type@llama_load_tensor@@QEAAXXZ + ?calc_type@llama_load_tensor@@QEAAXXZ + ?calculate_layer_vram_bytes@@YA_KAEBUfalcon_layer@@@Z + ?capacity@?$vector@DV?$allocator@D@std@@@std@@QEBA_KXZ + ?capacity@?$vector@HV?$allocator@H@std@@@std@@QEBA_KXZ + ?capacity@?$vector@IV?$allocator@I@std@@@std@@QEBA_KXZ + ?capacity@?$vector@MV?$allocator@M@std@@@std@@QEBA_KXZ + ?capacity@?$vector@NV?$allocator@N@std@@@std@@QEBA_KXZ + ?capacity@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEBA_KXZ + ?capacity@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEBA_KXZ + ?capacity@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@QEBA_KXZ + ?capacity@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@QEBA_KXZ + ?capacity@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@QEBA_KXZ + ?capacity@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEBA_KXZ + ?capacity@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEBA_KXZ + ?capacity@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@QEBA_KXZ + ?capacity@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEBA_KXZ + ?capacity@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@QEBA_KXZ + ?capacity@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@QEBA_KXZ + ?capacity@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEBA_KXZ + ?capacity@?$vector@_JV?$allocator@_J@std@@@std@@QEBA_KXZ + ?category@error_code@std@@QEBAAEBVerror_category@2@XZ + ?category@error_condition@std@@QEBAAEBVerror_category@2@XZ + ?clear@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?clear@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?clear@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?clear@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?clear@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAAXXZ + ?clear@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAAXXZ + ?clear@?$list@HV?$allocator@H@std@@@std@@QEAAXXZ + ?clear@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEAAXXZ + ?clear@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEAAXXZ + ?clear@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAAXXZ + ?clear@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEAAXXZ + ?clear@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEAAXXZ + ?clear@?$vector@HV?$allocator@H@std@@@std@@QEAAXXZ + ?clear@?$vector@MV?$allocator@M@std@@@std@@QEAAXXZ + ?close@?$basic_filebuf@DU?$char_traits@D@std@@@std@@QEAAPEAV12@XZ + ?close@?$basic_ifstream@DU?$char_traits@D@std@@@std@@QEAAXXZ + ?compare@?$_Narrow_char_traits@DH@std@@SAHQEBD0_K@Z + ?compare@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBAHAEBV12@@Z + ?convert_to_utf8@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBV?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@2@@Z + ?convert_to_wstring@@YA?AV?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?copy@?$_Char_traits@DH@std@@SAPEADQEADQEBD_K@Z + ?copy@?$_Char_traits@_WG@std@@SAPEA_WQEA_WQEB_W_K@Z + ?data@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBAPEBDXZ + ?data@?$vector@DV?$allocator@D@std@@@std@@QEAAPEADXZ + ?data@?$vector@EV?$allocator@E@std@@@std@@QEAAPEAEXZ + ?data@?$vector@HV?$allocator@H@std@@@std@@QEAAPEAHXZ + ?data@?$vector@HV?$allocator@H@std@@@std@@QEBAPEBHXZ + ?data@?$vector@IV?$allocator@I@std@@@std@@QEAAPEAIXZ + ?data@?$vector@MV?$allocator@M@std@@@std@@QEAAPEAMXZ + ?data@?$vector@MV?$allocator@M@std@@@std@@QEBAPEBMXZ + ?data@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAAPEAUllama_token_data@@XZ + ?data@?$vector@_JV?$allocator@_J@std@@@std@@QEAAPEA_JXZ + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@HPEAX@std@@@2@QEAU?$_List_node@HPEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@2@QEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@QEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@QEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@2@QEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@2@QEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@2@QEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@QEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@QEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@2@QEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@2@_K@Z + ?deallocate@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@SAXAEAV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@2@QEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@D@std@@QEAAXQEAD_K@Z + ?deallocate@?$allocator@E@std@@QEAAXQEAE_K@Z + ?deallocate@?$allocator@H@std@@QEAAXQEAH_K@Z + ?deallocate@?$allocator@I@std@@QEAAXQEAI_K@Z + ?deallocate@?$allocator@M@std@@QEAAXQEAM_K@Z + ?deallocate@?$allocator@N@std@@QEAAXQEAN_K@Z + ?deallocate@?$allocator@PEAUggml_tensor@@@std@@QEAAXQEAPEAUggml_tensor@@_K@Z + ?deallocate@?$allocator@U?$_List_node@HPEAX@std@@@std@@QEAAXQEAU?$_List_node@HPEAX@2@_K@Z + ?deallocate@?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@QEAAXQEAU?$_List_node@U?$pair@$$CBHH@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@QEAAXQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@QEAAXQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@QEAAXQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@QEAAXQEAU?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@QEAAXQEAU?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@QEAAXQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@QEAAXQEAU?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@QEAAXQEAU?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@QEAAXQEAU?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@2@_K@Z + ?deallocate@?$allocator@U?$pair@NH@std@@@std@@QEAAXQEAU?$pair@NH@2@_K@Z + ?deallocate@?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@QEAAXQEAU?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@2@_K@Z + ?deallocate@?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@QEAAXQEAU_Grp_t@?$_Tgt_state_t@PEBD@2@_K@Z + ?deallocate@?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@QEAAXQEAU_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@_K@Z + ?deallocate@?$allocator@U_Loop_vals_t@std@@@std@@QEAAXQEAU_Loop_vals_t@2@_K@Z + ?deallocate@?$allocator@Udollyv2_layer@@@std@@QEAAXQEAUdollyv2_layer@@_K@Z + ?deallocate@?$allocator@Ufalcon_layer@@@std@@QEAAXQEAUfalcon_layer@@_K@Z + ?deallocate@?$allocator@Ugpt2_layer@@@std@@QEAAXQEAUgpt2_layer@@_K@Z + ?deallocate@?$allocator@Ugpt_neox_layer@@@std@@QEAAXQEAUgpt_neox_layer@@_K@Z + ?deallocate@?$allocator@Ugptj_layer@@@std@@QEAAXQEAUgptj_layer@@_K@Z + ?deallocate@?$allocator@Ullama_buffer@@@std@@QEAAXQEAUllama_buffer@@_K@Z + ?deallocate@?$allocator@Ullama_layer@@@std@@QEAAXQEAUllama_layer@@_K@Z + ?deallocate@?$allocator@Ullama_load_tensor@@@std@@QEAAXQEAUllama_load_tensor@@_K@Z + ?deallocate@?$allocator@Ullama_load_tensor_shard@@@std@@QEAAXQEAUllama_load_tensor_shard@@_K@Z + ?deallocate@?$allocator@Ullama_sp_bigram@@@std@@QEAAXQEAUllama_sp_bigram@@_K@Z + ?deallocate@?$allocator@Ullama_sp_symbol@@@std@@QEAAXQEAUllama_sp_symbol@@_K@Z + ?deallocate@?$allocator@Ullama_token_data@@@std@@QEAAXQEAUllama_token_data@@_K@Z + ?deallocate@?$allocator@Umpt_layer@@@std@@QEAAXQEAUmpt_layer@@_K@Z + ?deallocate@?$allocator@Ureplit_layer@@@std@@QEAAXQEAUreplit_layer@@_K@Z + ?deallocate@?$allocator@Ustarcoder_layer@@@std@@QEAAXQEAUstarcoder_layer@@_K@Z + ?deallocate@?$allocator@Utoken_score@llama_vocab@@@std@@QEAAXQEAUtoken_score@llama_vocab@@_K@Z + ?deallocate@?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@QEAAXQEAV?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@2@_K@Z + ?deallocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@QEAAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@_K@Z + ?deallocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@QEAAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@_K@Z + ?deallocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@QEAAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@_K@Z + ?deallocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@QEAAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@2@_K@Z + ?deallocate@?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@QEAAXQEAV?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@_K@Z + ?deallocate@?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@QEAAXQEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@_K@Z + ?deallocate@?$allocator@V?$sub_match@PEBD@std@@@std@@QEAAXQEAV?$sub_match@PEBD@2@_K@Z + ?deallocate@?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@QEAAXQEAV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@_K@Z + ?deallocate@?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@QEAAXQEAV?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@2@_K@Z + ?deallocate@?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@QEAAXQEAV?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@2@_K@Z + ?deallocate@?$allocator@Vthread@std@@@std@@QEAAXQEAVthread@2@_K@Z + ?deallocate@?$allocator@_J@std@@QEAAXQEA_J_K@Z + ?deallocate@?$allocator@_K@std@@QEAAXQEA_K_K@Z + ?deallocate@?$allocator@_W@std@@QEAAXQEA_W_K@Z + ?default_error_condition@error_category@std@@UEBA?AVerror_condition@2@H@Z + ?do_always_noconv@?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@MEBA_NXZ + ?do_compare@?$collate@D@std@@MEBAHPEBD000@Z + ?do_encoding@?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@MEBAHXZ + ?do_hash@?$collate@D@std@@MEBAJPEBD0@Z + ?do_in@?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@MEBAHAEAU_Mbstatet@@PEBD1AEAPEBDPEA_W3AEAPEA_W@Z + ?do_length@?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@MEBAHAEAU_Mbstatet@@PEBD1_K@Z + ?do_max_length@?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@MEBAHXZ + ?do_out@?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@MEBAHAEAU_Mbstatet@@PEB_W1AEAPEB_WPEAD3AEAPEAD@Z + ?do_transform@?$collate@D@std@@MEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@PEBD0@Z + ?do_unshift@?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@MEBAHAEAU_Mbstatet@@PEAD1AEAPEAD@Z + ?dollyv2_eval@@YA_NAEBUdollyv2_model@@HHAEBV?$vector@HV?$allocator@H@std@@@std@@AEAV?$vector@MV?$allocator@M@std@@@3@AEA_K@Z + ?dollyv2_model_load@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAUdollyv2_model@@AEAUgpt_vocab@@@Z + ?done_getting_tensors@falcon_model_loader@@QEBAXXZ + ?done_getting_tensors@llama_model_loader@@QEBAXXZ + ?empty@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_NXZ + ?empty@?$priority_queue@Ullama_sp_bigram@@V?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@Ucomparator@1@@std@@QEBA_NXZ + ?empty@?$vector@MV?$allocator@M@std@@@std@@QEBA_NXZ + ?empty@?$vector@NV?$allocator@N@std@@@std@@QEBA_NXZ + ?empty@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEBA_NXZ + ?empty@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEBA_NXZ + ?empty@?$vector@_JV?$allocator@_J@std@@@std@@QEBA_NXZ + ?empty@?$vector@_NV?$allocator@_N@std@@@std@@QEBA_NXZ + ?encode_word@@YA?AU?$pair@V?$vector@HV?$allocator@H@std@@@std@@M@std@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@AEBV?$unordered_map@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@2@@Z + ?end@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@XZ + ?end@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEBA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@XZ + ?end@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@XZ + ?end@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@XZ + ?end@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEBA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@XZ + ?end@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEAA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@2@XZ + ?end@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@QEBA?AV?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@@2@XZ + ?end@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA?AV?$_Tree_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@XZ + ?end@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEBA?AV?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@XZ + ?end@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA?AV?$_Tree_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@XZ + ?end@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA?AV?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@2@XZ + ?end@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA?AV?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@2@XZ + ?end@?$initializer_list@H@std@@QEBAPEBHXZ + ?end@?$initializer_list@I@std@@QEBAPEBIXZ + ?end@?$initializer_list@PEBD@std@@QEBAPEBQEBDXZ + ?end@?$initializer_list@U?$pair@$$CBW4e_model@@_K@std@@@std@@QEBAPEBU?$pair@$$CBW4e_model@@_K@2@XZ + ?end@?$initializer_list@U?$pair@$$CBW4falcon_e_model@@_K@std@@@std@@QEBAPEBU?$pair@$$CBW4falcon_e_model@@_K@2@XZ + ?end@?$list@HV?$allocator@H@std@@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@@2@XZ + ?end@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@2@XZ + ?end@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEBA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@XZ + ?end@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@XZ + ?end@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEAA?AV?$_List_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@XZ + ?end@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEBA?AV?$_List_const_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@2@XZ + ?end@?$vector@HV?$allocator@H@std@@@std@@QEBA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@2@XZ + ?end@?$vector@IV?$allocator@I@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@2@XZ + ?end@?$vector@MV?$allocator@M@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@M@std@@@std@@@2@XZ + ?end@?$vector@NV?$allocator@N@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@2@XZ + ?end@?$vector@NV?$allocator@N@std@@@std@@QEBA?AV?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@N@std@@@std@@@2@XZ + ?end@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@U?$pair@NH@std@@@std@@@std@@@2@XZ + ?end@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_sp_bigram@@@std@@@std@@@2@XZ + ?end@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@Ullama_token_data@@@std@@@std@@@2@XZ + ?end@?$vector@_KV?$allocator@_K@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@_K@std@@@std@@@2@XZ + ?end@?$vector@_NV?$allocator@_N@std@@@std@@QEAA?AV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@2@XZ + ?eof@?$_Narrow_char_traits@DH@std@@SAHXZ + ?eq@?$_Narrow_char_traits@DH@std@@SA_NAEBD0@Z + ?eq_int_type@?$_Narrow_char_traits@DH@std@@SA_NAEBH0@Z + ?equivalent@error_category@std@@UEBA_NAEBVerror_code@2@H@Z + ?equivalent@error_category@std@@UEBA_NHAEBVerror_condition@2@@Z + ?erase@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAA?AV?$_String_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@2@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@2@0@Z + ?erase@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@_K0@Z + ?erase@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@_K@Z + ?erase@?$vector@IV?$allocator@I@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@2@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@I@std@@@std@@@2@0@Z + ?erase@?$vector@_NV?$allocator@_N@std@@@std@@QEAA?AV?$_Vb_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@2@V?$_Vb_const_iterator@U?$_Wrap_alloc@V?$allocator@I@std@@@std@@@2@0@Z + ?falcon_apply_lora_from_file_internal@@YAHPEAUfalcon_context@@PEBD1H@Z + ?falcon_token_cr@@YAHXZ + ?find@?$_Narrow_char_traits@DH@std@@SAPEBDQEBD_KAEBD@Z + ?find@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@QEBA?AV?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@@std@@@2@AEBH@Z + ?find@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEAA?AV?$_Tree_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?find@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEBA?AV?$_Tree_const_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@2@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?find@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEAA?AV?$_Tree_iterator@V?$_Tree_val@U?$_Tree_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@2@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?find@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_KAEBV12@_K@Z + ?find@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_KQEBD_K@Z + ?flags@?$basic_regex@DV?$regex_traits@D@std@@@std@@QEBA?AW4syntax_option_type@regex_constants@2@XZ + ?from_bytes@?$wstring_convert@V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@_WV?$allocator@_W@2@V?$allocator@D@2@@std@@QEAA?AV?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@2@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?from_bytes@?$wstring_convert@V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@_WV?$allocator@_W@2@V?$allocator@D@2@@std@@QEAA?AV?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@2@PEBD0@Z + ?front@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEADXZ + ?front@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEBAAEBUllama_sp_bigram@@XZ + ?generic_category@std@@YAAEBVerror_category@1@XZ + ?get@?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@QEBAPEAUfalcon_file_loader@@XZ + ?get@?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@QEBAPEAUllama_file_loader@@XZ + ?get_allocator@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA?AV?$allocator@D@2@XZ + ?get_deleter@?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@QEAAAEAU?$default_delete@Ufalcon_file_loader@@@2@XZ + ?get_deleter@?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@QEAAAEAU?$default_delete@Ullama_file_loader@@@2@XZ + ?get_deleter@?$unique_ptr@V_Node_assert@std@@U?$default_delete@V_Node_assert@std@@@2@@std@@QEAAAEAU?$default_delete@V_Node_assert@std@@@2@XZ + ?get_tensor@falcon_model_loader@@QEAAPEAUggml_tensor@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBV?$vector@IV?$allocator@I@std@@@4@W4ggml_backend@@@Z + ?get_tensor@llama_model_loader@@QEAAPEAUggml_tensor@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBV?$vector@IV?$allocator@I@std@@@4@W4ggml_backend@@@Z + ?get_tensor_for@falcon_model_loader@@QEAAPEAUggml_tensor@@AEAUllama_load_tensor@@W4ggml_backend@@@Z + ?get_tensor_for@llama_model_loader@@QEAAPEAUggml_tensor@@AEAUllama_load_tensor@@W4ggml_backend@@@Z + ?get_tensors_from_layer@@YA?AV?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@AEAUfalcon_layer@@@Z + ?gpt2_eval@@YA_NAEBUgpt2_model@@HHAEBV?$vector@HV?$allocator@H@std@@@std@@AEAV?$vector@MV?$allocator@M@std@@@3@AEA_K@Z + ?gpt2_model_load@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAUgpt2_model@@AEAUgpt_vocab@@@Z + ?gpt_neox_eval@@YA_NAEBUgpt_neox_model@@HHAEBV?$vector@HV?$allocator@H@std@@@std@@AEAV?$vector@MV?$allocator@M@std@@@3@AEA_K@Z + ?gpt_neox_ff@@YAPEAUggml_tensor@@AEBUdollyv2_layer@@PEAUggml_context@@PEAU1@@Z + ?gpt_neox_ff@@YAPEAUggml_tensor@@AEBUgpt_neox_layer@@PEAUggml_context@@PEAU1@@Z + ?gpt_neox_model_load@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAUgpt_neox_model@@AEAUgpt_vocab@@@Z + ?gpt_sample_top_k_top_p@@YAHAEBUgpt_vocab@@PEBMHNNMAEBV?$unordered_set@HU?$hash@H@std@@U?$equal_to@H@2@V?$allocator@H@2@@std@@AEAV?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@3@@Z + ?gpt_split_words@@YAXV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAV?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@@Z + ?gpt_tokenize@@YA?AV?$vector@HV?$allocator@H@std@@@std@@AEBUgpt_vocab@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?gptj_eval@@YA_NAEBUgptj_model@@HHAEBV?$vector@HV?$allocator@H@std@@@std@@AEAV?$vector@MV?$allocator@M@std@@@3@AEA_K@Z + ?gptj_model_load@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAUgptj_model@@AEAUgpt_vocab@@@Z + ?grow_to@llama_mlock@@QEAAX_K@Z + ?guess_n_parts@falcon_model_loader@@QEBAIXZ + ?guess_n_parts@llama_model_loader@@QEBAIXZ + ?hardware_concurrency@thread@std@@SAIXZ + ?imbue@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAAXAEBVlocale@2@@Z + ?infinity@?$numeric_limits@M@std@@SAMXZ + ?init@llama_mlock@@QEAAXPEAX@Z + ?insert@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@QEAAXV?$initializer_list@U?$pair@$$CBW4e_model@@_K@std@@@2@@Z + ?insert@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@QEAAXV?$initializer_list@U?$pair@$$CBW4falcon_e_model@@_K@std@@@2@@Z + ?insert@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@_KQEBD0@Z + ?insert@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAAEAV12@_KQEBD@Z + ?insert@?$vector@HV?$allocator@H@std@@@std@@QEAA?AV?$_Vector_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@2@V?$_Vector_const_iterator@V?$_Vector_val@U?$_Simple_types@H@std@@@std@@@2@$$QEAH@Z + ?isctype@?$_Regex_traits@D@std@@QEBA_NDF@Z + ?join@thread@std@@QEAAXXZ + ?joinable@thread@std@@QEBA_NXZ + ?length@?$_Narrow_char_traits@DH@std@@SA_KQEBD@Z + ?length@?$_Regex_traits@D@std@@SA_KPEBD@Z + ?length@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_KXZ + ?llama_apply_lora_from_file_internal@@YAHPEAUllama_context@@PEBD1H@Z + ?llama_internal_get_tensor_map@@YAAEAV?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@PEAUfalcon_context@@@Z + ?llama_internal_get_tensor_map@@YAAEAV?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@PEAUllama_context@@@Z + ?llama_nop@@YAXPEAUggml_tensor@@@Z + ?load_all_data@falcon_model_loader@@QEAAXP6AXMPEAX@Z0PEAUllama_mlock@@@Z + ?load_all_data@llama_model_loader@@QEAAXP6AXMPEAX@Z0PEAUllama_mlock@@@Z + ?load_data_for@falcon_model_loader@@QEAAXAEAUllama_load_tensor@@@Z + ?load_data_for@llama_model_loader@@QEAAXAEAUllama_load_tensor@@@Z + ?lock@_Mutex_base@std@@QEAAXXZ + ?lock_granularity@llama_mlock@@QEAA_KXZ + ?make_error_code@std@@YA?AVerror_code@1@W4errc@1@@Z + ?mark_count@?$basic_regex@DV?$regex_traits@D@std@@@std@@QEBAIXZ + ?max@?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@std@@SAIXZ + ?max@?$numeric_limits@D@std@@SADXZ + ?max@?$numeric_limits@E@std@@SAEXZ + ?max@?$numeric_limits@_J@std@@SA_JXZ + ?max_load_factor@?$_Hash@V?$_Umap_traits@HHV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@$0A@@std@@@std@@QEBAMXZ + ?max_load_factor@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEBAMXZ + ?max_load_factor@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEBAMXZ + ?max_load_factor@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@V?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@$0A@@std@@@std@@QEBAMXZ + ?max_load_factor@?$_Hash@V?$_Umap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_KV?$_Uhash_compare@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$hash@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@U?$equal_to@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@$0A@@std@@@std@@QEBAMXZ + ?max_load_factor@?$_Hash@V?$_Uset_traits@HV?$_Uhash_compare@HU?$hash@H@std@@U?$equal_to@H@2@@std@@V?$allocator@H@2@$0A@@std@@@std@@QEBAMXZ + ?max_size@?$_Default_allocator_traits@V?$allocator@D@std@@@std@@SA_KAEBV?$allocator@D@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@E@std@@@std@@SA_KAEBV?$allocator@E@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@H@std@@@std@@SA_KAEBV?$allocator@H@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@I@std@@@std@@SA_KAEBV?$allocator@I@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@M@std@@@std@@SA_KAEBV?$allocator@M@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@N@std@@@std@@SA_KAEBV?$allocator@N@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@PEAUggml_tensor@@@std@@@std@@SA_KAEBV?$allocator@PEAUggml_tensor@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_List_node@HPEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_List_node@HPEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_List_node@U?$pair@$$CBHH@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_List_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBW4e_model@@_K@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@std@@@std@@SA_KAEBV?$allocator@U?$_Tree_node@U?$pair@$$CBW4falcon_e_model@@_K@std@@PEAX@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$pair@NH@std@@@std@@@std@@SA_KAEBV?$allocator@U?$pair@NH@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@SA_KAEBV?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@SA_KAEBV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@SA_KAEBV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@U_Loop_vals_t@std@@@std@@@std@@SA_KAEBV?$allocator@U_Loop_vals_t@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Udollyv2_layer@@@std@@@std@@SA_KAEBV?$allocator@Udollyv2_layer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ufalcon_layer@@@std@@@std@@SA_KAEBV?$allocator@Ufalcon_layer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ugpt2_layer@@@std@@@std@@SA_KAEBV?$allocator@Ugpt2_layer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ugpt_neox_layer@@@std@@@std@@SA_KAEBV?$allocator@Ugpt_neox_layer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ugptj_layer@@@std@@@std@@SA_KAEBV?$allocator@Ugptj_layer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ullama_buffer@@@std@@@std@@SA_KAEBV?$allocator@Ullama_buffer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ullama_layer@@@std@@@std@@SA_KAEBV?$allocator@Ullama_layer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ullama_load_tensor@@@std@@@std@@SA_KAEBV?$allocator@Ullama_load_tensor@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@SA_KAEBV?$allocator@Ullama_load_tensor_shard@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ullama_sp_bigram@@@std@@@std@@SA_KAEBV?$allocator@Ullama_sp_bigram@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ullama_sp_symbol@@@std@@@std@@SA_KAEBV?$allocator@Ullama_sp_symbol@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ullama_token_data@@@std@@@std@@SA_KAEBV?$allocator@Ullama_token_data@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Umpt_layer@@@std@@@std@@SA_KAEBV?$allocator@Umpt_layer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ureplit_layer@@@std@@@std@@SA_KAEBV?$allocator@Ureplit_layer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Ustarcoder_layer@@@std@@@std@@SA_KAEBV?$allocator@Ustarcoder_layer@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@SA_KAEBV?$allocator@Utoken_score@llama_vocab@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@SA_KAEBV?$allocator@V?$sub_match@PEBD@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@std@@@std@@SA_KAEBV?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@Vthread@std@@@std@@@std@@SA_KAEBV?$allocator@Vthread@std@@@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@_J@std@@@std@@SA_KAEBV?$allocator@_J@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@_K@std@@@std@@SA_KAEBV?$allocator@_K@2@@Z + ?max_size@?$_Default_allocator_traits@V?$allocator@_W@std@@@std@@SA_KAEBV?$allocator@_W@2@@Z + ?max_size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@QEBA_KXZ + ?max_size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?max_size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?max_size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?max_size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?max_size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?max_size@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@QEBA_KXZ + ?max_size@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@HU?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@$0A@@std@@@std@@QEBA_KXZ + ?max_size@?$_Tree@V?$_Tmap_traits@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@U?$less@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@$0A@@std@@@std@@QEBA_KXZ + ?max_size@?$_Tree@V?$_Tmap_traits@W4e_model@@_KU?$less@W4e_model@@@std@@V?$allocator@U?$pair@$$CBW4e_model@@_K@std@@@3@$0A@@std@@@std@@QEBA_KXZ + ?max_size@?$_Tree@V?$_Tmap_traits@W4falcon_e_model@@_KU?$less@W4falcon_e_model@@@std@@V?$allocator@U?$pair@$$CBW4falcon_e_model@@_K@std@@@3@$0A@@std@@@std@@QEBA_KXZ + ?max_size@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_KXZ + ?max_size@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEBA_KXZ + ?max_size@?$list@HV?$allocator@H@std@@@std@@QEBA_KXZ + ?max_size@?$list@U?$pair@$$CBHH@std@@V?$allocator@U?$pair@$$CBHH@std@@@2@@std@@QEBA_KXZ + ?max_size@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@2@@std@@QEBA_KXZ + ?max_size@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEBA_KXZ + ?max_size@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@2@@std@@QEBA_KXZ + ?max_size@?$list@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@V?$allocator@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@DV?$allocator@D@std@@@std@@QEBA_KXZ + ?max_size@?$vector@EV?$allocator@E@std@@@std@@QEBA_KXZ + ?max_size@?$vector@HV?$allocator@H@std@@@std@@QEBA_KXZ + ?max_size@?$vector@IV?$allocator@I@std@@@std@@QEBA_KXZ + ?max_size@?$vector@MV?$allocator@M@std@@@std@@QEBA_KXZ + ?max_size@?$vector@NV?$allocator@N@std@@@std@@QEBA_KXZ + ?max_size@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@V?$allocator@U?$pair@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@QEBA_KXZ + ?max_size@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@QEBA_KXZ + ?max_size@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ullama_buffer@@V?$allocator@Ullama_buffer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEBA_KXZ + ?max_size@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@V?$allocator@V?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEBA_KXZ + ?max_size@?$vector@_JV?$allocator@_J@std@@@std@@QEBA_KXZ + ?max_size@?$vector@_KV?$allocator@_K@std@@@std@@QEBA_KXZ + ?max_size@?$vector@_NV?$allocator@_N@std@@@std@@QEBA_KXZ + ?message@_Generic_error_category@std@@UEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@H@Z + ?message@error_code@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?min@?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@std@@SAIXZ + ?move@?$_Char_traits@DH@std@@SAPEADQEADQEBD_K@Z + ?move@?$_Char_traits@_WG@std@@SAPEA_WQEA_WQEB_W_K@Z + ?mpt_eval@@YA_NAEBUmpt_model@@HHAEBV?$vector@HV?$allocator@H@std@@@std@@AEAV?$vector@MV?$allocator@M@std@@@3@AEA_K@Z + ?mpt_model_load@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAUmpt_model@@AEAUgpt_vocab@@@Z + ?name@_Generic_error_category@std@@UEBAPEBDXZ + ?name@locale@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?not_eof@?$_Narrow_char_traits@DH@std@@SAHAEBH@Z + ?open@?$basic_filebuf@DU?$char_traits@D@std@@@std@@QEAAPEAV12@PEBDHH@Z + ?overflow@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAAHH@Z + ?overflow@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@MEAAHH@Z + ?pbackfail@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAAHH@Z + ?pbackfail@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@MEAAHH@Z + ?pointer_to@?$pointer_traits@PEAD@std@@SAPEADAEAD@Z + ?pointer_to@?$pointer_traits@PEAU?$pair@$$CBHH@std@@@std@@SAPEAU?$pair@$$CBHH@2@AEAU32@@Z + ?pointer_to@?$pointer_traits@PEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@SAPEAU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@2@AEAU32@@Z + ?pointer_to@?$pointer_traits@PEBU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@std@@SAPEBU?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@AEBU32@@Z + ?pointer_to@?$pointer_traits@PEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@SAPEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@2@AEBU32@@Z + ?pointer_to@?$pointer_traits@PEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@SAPEBU?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@2@AEBU32@@Z + ?pop@?$priority_queue@Ullama_sp_bigram@@V?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@Ucomparator@1@@std@@QEAAXXZ + ?pop_back@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAAXXZ + ?prefix@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEBAAEBV?$sub_match@PEBD@2@XZ + ?prefix@?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEBAAEBV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?print_checksum@falcon_model_loader@@SAXAEAUllama_load_tensor@@@Z + ?print_checksum@llama_model_loader@@SAXAEAUllama_load_tensor@@@Z + ?push@?$priority_queue@Ullama_sp_bigram@@V?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@Ucomparator@1@@std@@QEAAXAEBUllama_sp_bigram@@@Z + ?push_back@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAXD@Z + ?push_back@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEAAX_W@Z + ?push_back@?$vector@HV?$allocator@H@std@@@std@@QEAAX$$QEAH@Z + ?push_back@?$vector@HV?$allocator@H@std@@@std@@QEAAXAEBH@Z + ?push_back@?$vector@MV?$allocator@M@std@@@std@@QEAAXAEBM@Z + ?push_back@?$vector@NV?$allocator@N@std@@@std@@QEAAX$$QEAN@Z + ?push_back@?$vector@NV?$allocator@N@std@@@std@@QEAAXAEBN@Z + ?push_back@?$vector@PEAUggml_tensor@@V?$allocator@PEAUggml_tensor@@@std@@@std@@QEAAXAEBQEAUggml_tensor@@@Z + ?push_back@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAAX$$QEAU?$pair@NH@2@@Z + ?push_back@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEAAXAEBUllama_load_tensor_shard@@@Z + ?push_back@?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@QEAAXAEBUllama_sp_bigram@@@Z + ?push_back@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAAXAEBUllama_token_data@@@Z + ?push_back@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAAX$$QEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?push_back@?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@QEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?push_back@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEAAX$$QEAVthread@2@@Z + ?raw_lock@llama_mlock@@QEAA_NPEAX_K@Z + ?raw_unlock@llama_mlock@@QEAAXPEAX_K@Z + ?read_hparams@falcon_file_loader@@QEAAXXZ + ?read_hparams@llama_file_loader@@QEAAXXZ + ?read_magic@falcon_file_loader@@QEAAXXZ + ?read_magic@llama_file_loader@@QEAAXXZ + ?read_raw@llama_file@@QEBAXPEAX_K@Z + ?read_string@llama_file@@QEAA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@I@Z + ?read_tensor_metadata@falcon_file_loader@@QEAAX_KAEAUllama_load_tensors_map@@@Z + ?read_tensor_metadata@llama_file_loader@@QEAAX_KAEAUllama_load_tensors_map@@@Z + ?read_u32@llama_file@@QEAAIXZ + ?read_vocab@falcon_file_loader@@QEAAXXZ + ?read_vocab@llama_file_loader@@QEAAXXZ + ?release@?$unique_ptr@Ufalcon_file_loader@@U?$default_delete@Ufalcon_file_loader@@@std@@@std@@QEAAPEAUfalcon_file_loader@@XZ + ?release@?$unique_ptr@Ullama_file_loader@@U?$default_delete@Ullama_file_loader@@@std@@@std@@QEAAPEAUllama_file_loader@@XZ + ?release@?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@std@@QEAAPEAUllama_mmap@@XZ + ?release@?$unique_ptr@V_Facet_base@std@@U?$default_delete@V_Facet_base@std@@@2@@std@@QEAAPEAV_Facet_base@2@XZ + ?release@?$unique_ptr@V_Node_assert@std@@U?$default_delete@V_Node_assert@std@@@2@@std@@QEAAPEAV_Node_assert@2@XZ + ?replace_all@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEBV12@00@Z + ?replit_eval@@YA_NAEBUreplit_model@@HHAEBV?$vector@HV?$allocator@H@std@@@std@@AEAV?$vector@MV?$allocator@M@std@@@3@AEA_K@Z + ?replit_model_load@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAUreplit_model@@AEAUreplit_tokenizer@@@Z + ?replit_tokenizer_load@@YA_NAEAUreplit_tokenizer@@AEAV?$basic_istream@DU?$char_traits@D@std@@@std@@H@Z + ?replit_tokenizer_tokenize@@YA?AV?$vector@HV?$allocator@H@std@@@std@@AEBUreplit_tokenizer@@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?reserve@?$vector@MV?$allocator@M@std@@@std@@QEAAX_K@Z + ?reserve@?$vector@NV?$allocator@N@std@@@std@@QEAAX_K@Z + ?reserve@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAAX_K@Z + ?reserve@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEAAX_K@Z + ?reset@?$unique_ptr@Ufalcon_model_loader@@U?$default_delete@Ufalcon_model_loader@@@std@@@std@@QEAAXPEAUfalcon_model_loader@@@Z + ?reset@?$unique_ptr@Ullama_mmap@@U?$default_delete@Ullama_mmap@@@std@@@std@@QEAAXPEAUllama_mmap@@@Z + ?reset@?$unique_ptr@Ullama_model_loader@@U?$default_delete@Ullama_model_loader@@@std@@@std@@QEAAXPEAUllama_model_loader@@@Z + ?resize@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAX_KD@Z + ?resize@?$vector@DV?$allocator@D@std@@@std@@QEAAX_K@Z + ?resize@?$vector@IV?$allocator@I@std@@@std@@QEAAX_K@Z + ?resize@?$vector@IV?$allocator@I@std@@@std@@QEAAX_KAEBI@Z + ?resize@?$vector@MV?$allocator@M@std@@@std@@QEAAX_K@Z + ?resize@?$vector@NV?$allocator@N@std@@@std@@QEAAX_K@Z + ?resize@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEAAX_K@Z + ?resize@?$vector@U_Grp_t@?$_Tgt_state_t@PEBD@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@3@@std@@QEAAX_K@Z + ?resize@?$vector@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@3@@std@@QEAAX_K@Z + ?resize@?$vector@U_Loop_vals_t@std@@V?$allocator@U_Loop_vals_t@std@@@2@@std@@QEAAX_K@Z + ?resize@?$vector@Udollyv2_layer@@V?$allocator@Udollyv2_layer@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@Ufalcon_layer@@V?$allocator@Ufalcon_layer@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@Ugpt2_layer@@V?$allocator@Ugpt2_layer@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@Ugpt_neox_layer@@V?$allocator@Ugpt_neox_layer@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@Ugptj_layer@@V?$allocator@Ugptj_layer@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@Ullama_layer@@V?$allocator@Ullama_layer@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@Umpt_layer@@V?$allocator@Umpt_layer@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@Ureplit_layer@@V?$allocator@Ureplit_layer@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@Ustarcoder_layer@@V?$allocator@Ustarcoder_layer@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEAAX_K@Z + ?resize@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@QEAAX_K@Z + ?resize@?$vector@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEAAX_K@Z + ?resize@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEAAX_K@Z + ?resize@?$vector@_JV?$allocator@_J@std@@@std@@QEAAX_KAEB_J@Z + ?resize@?$vector@_NV?$allocator@_N@std@@@std@@QEAAX_K_N@Z + ?resize@llama_buffer@@QEAAX_K@Z + ?rfind@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_KAEBV12@_K@Z + ?rfind@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_KQEBD_K@Z + ?seed@?$mersenne_twister@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@@std@@QEAAXII@Z + ?seed@?$mersenne_twister_engine@I$0CA@$0CHA@$0BIN@$0BP@$0JJAILANP@$0L@$0PPPPPPPP@$06$0JNCMFGIA@$0P@$0OPMGAAAA@$0BC@$0GMAHIJGF@@std@@QEAAXI@Z + ?seek@llama_file@@QEAAX_KH@Z + ?seekoff@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAA?AV?$fpos@U_Mbstatet@@@2@_JHH@Z + ?seekoff@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@MEAA?AV?$fpos@U_Mbstatet@@@2@_JHH@Z + ?seekpos@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAA?AV?$fpos@U_Mbstatet@@@2@V32@H@Z + ?seekpos@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@MEAA?AV?$fpos@U_Mbstatet@@@2@V32@H@Z + ?select_on_container_copy_construction@?$_Default_allocator_traits@V?$allocator@D@std@@@std@@SA?AV?$allocator@D@2@AEBV32@@Z + ?select_on_container_copy_construction@?$_Default_allocator_traits@V?$allocator@H@std@@@std@@SA?AV?$allocator@H@2@AEBV32@@Z + ?select_on_container_copy_construction@?$_Default_allocator_traits@V?$allocator@I@std@@@std@@SA?AV?$allocator@I@2@AEBV32@@Z + ?select_on_container_copy_construction@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@std@@@std@@SA?AV?$allocator@U_Grp_t@?$_Tgt_state_t@PEBD@std@@@2@AEBV32@@Z + ?select_on_container_copy_construction@?$_Default_allocator_traits@V?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@std@@@std@@SA?AV?$allocator@U_Grp_t@?$_Tgt_state_t@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@AEBV32@@Z + ?select_on_container_copy_construction@?$_Default_allocator_traits@V?$allocator@_W@std@@@std@@SA?AV?$allocator@_W@2@AEBV32@@Z + ?setbuf@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAAPEAV?$basic_streambuf@DU?$char_traits@D@std@@@2@PEAD_J@Z + ?size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_const_iterator@V?$_List_val@U?$_List_simple_types@H@std@@@std@@U_Iterator_base0@2@@std@@@std@@@std@@QEBA_KXZ + ?size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBHH@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAUggml_tensor@@@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$pair@_KM@2@@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?size@?$_Hash_vec@V?$allocator@V?$_List_unchecked_iterator@V?$_List_val@U?$_List_simple_types@U?$pair@$$CBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_K@std@@@std@@@std@@@std@@@std@@@std@@QEBA_KXZ + ?size@?$_Tree@V?$_Tmap_traits@HV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@U?$less@H@2@V?$allocator@U?$pair@$$CBHV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@std@@@2@$0A@@std@@@std@@QEBA_KXZ + ?size@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA_KXZ + ?size@?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@QEBA_KXZ + ?size@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEBA_KXZ + ?size@?$vector@DV?$allocator@D@std@@@std@@QEBA_KXZ + ?size@?$vector@EV?$allocator@E@std@@@std@@QEBA_KXZ + ?size@?$vector@HV?$allocator@H@std@@@std@@QEBA_KXZ + ?size@?$vector@IV?$allocator@I@std@@@std@@QEBA_KXZ + ?size@?$vector@MV?$allocator@M@std@@@std@@QEBA_KXZ + ?size@?$vector@NV?$allocator@N@std@@@std@@QEBA_KXZ + ?size@?$vector@U?$pair@NH@std@@V?$allocator@U?$pair@NH@std@@@2@@std@@QEBA_KXZ + ?size@?$vector@Ullama_load_tensor@@V?$allocator@Ullama_load_tensor@@@std@@@std@@QEBA_KXZ + ?size@?$vector@Ullama_load_tensor_shard@@V?$allocator@Ullama_load_tensor_shard@@@std@@@std@@QEBA_KXZ + ?size@?$vector@Ullama_sp_symbol@@V?$allocator@Ullama_sp_symbol@@@std@@@std@@QEBA_KXZ + ?size@?$vector@Ullama_token_data@@V?$allocator@Ullama_token_data@@@std@@@std@@QEBA_KXZ + ?size@?$vector@Utoken_score@llama_vocab@@V?$allocator@Utoken_score@llama_vocab@@@std@@@std@@QEBA_KXZ + ?size@?$vector@V?$sub_match@PEBD@std@@V?$allocator@V?$sub_match@PEBD@std@@@2@@std@@QEBA_KXZ + ?size@?$vector@Vthread@std@@V?$allocator@Vthread@std@@@2@@std@@QEBA_KXZ + ?size@?$vector@_JV?$allocator@_J@std@@@std@@QEBA_KXZ + ?size@?$vector@_KV?$allocator@_K@std@@@std@@QEBA_KXZ + ?size@?$vector@_NV?$allocator@_N@std@@@std@@QEBA_KXZ + ?sqrt@@YAMM@Z + ?starcoder_eval@@YA_NAEBUstarcoder_model@@HHAEBV?$vector@HV?$allocator@H@std@@@std@@AEAV?$vector@MV?$allocator@M@std@@@3@AEA_K@Z + ?starcoder_model_load@@YA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAUstarcoder_model@@AEAUgpt_vocab@@@Z + ?state@?$fpos@U_Mbstatet@@@std@@QEBA?AU_Mbstatet@@XZ + ?str@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?str@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?str@?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@@Z + ?str@?$basic_stringstream@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?str@?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@XZ + ?substr@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEBA?AV12@_K0@Z + ?suffix@?$match_results@PEBDV?$allocator@V?$sub_match@PEBD@std@@@std@@@std@@QEBAAEBV?$sub_match@PEBD@2@XZ + ?suffix@?$match_results@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@V?$allocator@V?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@std@@@2@@std@@QEBAAEBV?$sub_match@V?$_String_const_iterator@V?$_String_val@U?$_Simple_types@D@std@@@std@@@std@@@2@XZ + ?sync@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAAHXZ + ?tell@llama_file@@QEBA_KXZ + ?to_bytes@?$wstring_convert@V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@_WV?$allocator@_W@2@V?$allocator@D@2@@std@@QEAA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@AEBV?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@2@@Z + ?to_bytes@?$wstring_convert@V?$codecvt_utf8@_W$0BAPPPP@$0A@@std@@_WV?$allocator@_W@2@V?$allocator@D@2@@std@@QEAA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@PEB_W0@Z + ?to_char_type@?$_Narrow_char_traits@DH@std@@SADAEBH@Z + ?to_int_type@?$_Narrow_char_traits@DH@std@@SAHAEBD@Z + ?to_string@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@H@Z + ?to_string@std@@YA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@1@I@Z + ?tokenize@llama_tokenizer@@QEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@AEAV?$vector@HV?$allocator@H@std@@@3@@Z + ?top@?$priority_queue@Ullama_sp_bigram@@V?$vector@Ullama_sp_bigram@@V?$allocator@Ullama_sp_bigram@@@std@@@std@@Ucomparator@1@@std@@QEBAAEBUllama_sp_bigram@@XZ + ?transform@?$collate@D@std@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@2@PEBD0@Z + ?translate@?$_Regex_traits@D@std@@QEBADD@Z + ?translate_nocase@?$_Regex_traits@D@std@@QEBADD@Z + ?try_add_bigram@llama_tokenizer@@AEAAXHH@Z + ?uflow@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAAHXZ + ?underflow@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAAHXZ + ?underflow@?$basic_stringbuf@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@MEAAHXZ + ?unlock@?$unique_lock@Vmutex@std@@@std@@QEAAXXZ + ?unlock@_Mutex_base@std@@QEAAXXZ + ?use_buf@falcon_context@@QEAAXPEAUggml_context@@H@Z + ?use_buf@llama_context@@QEAAXPEAUggml_context@@H@Z + ?value@?$regex_traits@D@std@@QEBAHDH@Z + ?value@error_code@std@@QEBAHXZ + ?value@error_condition@std@@QEBAHXZ + ?what@exception@std@@UEBAPEBDXZ + ?write_hparams@falcon_file_saver@@QEAAXW4llama_ftype@@@Z + ?write_hparams@llama_file_saver@@QEAAXW4llama_ftype@@@Z + ?write_magic@falcon_file_saver@@QEAAXXZ + ?write_magic@llama_file_saver@@QEAAXXZ + ?write_raw@llama_file@@QEBAXPEBX_K@Z + ?write_tensor@falcon_file_saver@@QEAAXAEAUllama_load_tensor@@W4ggml_type@@PEBX_K@Z + ?write_tensor@llama_file_saver@@QEAAXAEAUllama_load_tensor@@W4ggml_type@@PEBX_K@Z + ?write_u32@llama_file@@QEAAXI@Z + ?write_vocab@falcon_file_saver@@QEAAXXZ + ?write_vocab@llama_file_saver@@QEAAXXZ + ?xsgetn@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAA_JPEAD_J@Z + ?xsputn@?$basic_filebuf@DU?$char_traits@D@std@@@std@@MEAA_JPEBD_J@Z + __local_stdio_printf_options + _vfprintf_l + ctransformers_llm_batch_eval + ctransformers_llm_context_length + ctransformers_llm_create + ctransformers_llm_delete + ctransformers_llm_detokenize + ctransformers_llm_embeddings_data + ctransformers_llm_embeddings_size + ctransformers_llm_eos_token_id + ctransformers_llm_is_eos_token + ctransformers_llm_logits_data + ctransformers_llm_logits_size + ctransformers_llm_reset + ctransformers_llm_sample + ctransformers_llm_tokenize + ctransformers_llm_vocab_size + dequantize_row_q2_K + dequantize_row_q3_K + dequantize_row_q4_K + dequantize_row_q5_K + dequantize_row_q6_K + dequantize_row_q8_K + fabsf + falcon_apply_lora_from_file + falcon_context_default_params + falcon_copy_state_data + falcon_eval + falcon_eval_export + falcon_free + falcon_get_embeddings + falcon_get_kv_cache_token_count + falcon_get_logits + falcon_get_state_size + falcon_get_vocab + falcon_init_from_file + falcon_load_session_file + falcon_model_quantize + falcon_model_quantize_default_params + falcon_n_ctx + falcon_n_embd + falcon_n_vocab + falcon_print_system_info + falcon_print_timings + falcon_reset_timings + falcon_sample_frequency_and_presence_penalties + falcon_sample_repetition_penalty + falcon_sample_softmax + falcon_sample_tail_free + falcon_sample_temperature + falcon_sample_token + falcon_sample_token_greedy + falcon_sample_token_mirostat + falcon_sample_token_mirostat_v2 + falcon_sample_top_k + falcon_sample_top_p + falcon_sample_typical + falcon_save_session_file + falcon_set_rng_seed + falcon_set_state_data + falcon_token_bos + falcon_token_eos + falcon_token_nl + falcon_token_to_str + falcon_tokenize + fprintf + ggml_abs + ggml_abs_impl + ggml_abs_inplace + ggml_acc + ggml_acc_impl + ggml_acc_inplace + ggml_add + ggml_add1 + ggml_add1_impl + ggml_add1_inplace + ggml_add_impl + ggml_add_inplace + ggml_alibi + ggml_blck_size + ggml_build_backward + ggml_build_forward + ggml_build_forward_expand + ggml_clamp + ggml_cont + ggml_cont_impl + ggml_cont_inplace + ggml_conv_1d_1s + ggml_conv_1d_2s + ggml_cpu_has_arm_fma + ggml_cpu_has_avx + ggml_cpu_has_avx2 + ggml_cpu_has_avx512 + ggml_cpu_has_avx512_vbmi + ggml_cpu_has_avx512_vnni + ggml_cpu_has_blas + ggml_cpu_has_clblast + ggml_cpu_has_cublas + ggml_cpu_has_f16c + ggml_cpu_has_fma + ggml_cpu_has_fp16_va + ggml_cpu_has_gpublas + ggml_cpu_has_neon + ggml_cpu_has_sse3 + ggml_cpu_has_vsx + ggml_cpu_has_wasm_simd + ggml_cpy + ggml_cpy_impl + ggml_cpy_inplace + ggml_cross_entropy_loss + ggml_cross_entropy_loss_back + ggml_cycles + ggml_cycles_per_ms + ggml_diag + ggml_diag_mask_inf + ggml_diag_mask_inf_impl + ggml_diag_mask_inf_inplace + ggml_diag_mask_zero + ggml_diag_mask_zero_impl + ggml_diag_mask_zero_inplace + ggml_div + ggml_div_impl + ggml_div_inplace + ggml_dup + ggml_dup_impl + ggml_dup_inplace + ggml_dup_tensor + ggml_element_size + ggml_flash_attn + ggml_flash_attn_back + ggml_flash_ff + ggml_fp16_to_fp32 + ggml_fp16_to_fp32_row + ggml_fp32_to_fp16 + ggml_fp32_to_fp16_row + ggml_free + ggml_ftype_to_ggml_type + ggml_gelu + ggml_gelu_impl + ggml_gelu_inplace + ggml_get_data + ggml_get_data_f32 + ggml_get_f32_1d + ggml_get_i32_1d + ggml_get_max_tensor_size + ggml_get_mem_buffer + ggml_get_mem_size + ggml_get_name + ggml_get_rows + ggml_get_rows_back + ggml_get_tensor + ggml_graph_compute + ggml_graph_dump_dot + ggml_graph_export + ggml_graph_get_tensor + ggml_graph_import + ggml_graph_print + ggml_graph_reset + ggml_init + ggml_internal_get_quantize_fn + ggml_is_contiguous + ggml_is_permuted + ggml_is_quantized + ggml_is_transposed + ggml_log + ggml_log_impl + ggml_log_inplace + ggml_map_binary_f32 + ggml_map_binary_impl_f32 + ggml_map_binary_inplace_f32 + ggml_map_unary_f32 + ggml_map_unary_impl_f32 + ggml_map_unary_inplace_f32 + ggml_mean + ggml_mul + ggml_mul_impl + ggml_mul_inplace + ggml_mul_mat + ggml_nbytes + ggml_nbytes_split + ggml_neg + ggml_neg_impl + ggml_neg_inplace + ggml_nelements + ggml_new_f32 + ggml_new_i32 + ggml_new_tensor + ggml_new_tensor_1d + ggml_new_tensor_2d + ggml_new_tensor_3d + ggml_new_tensor_4d + ggml_new_tensor_impl + ggml_norm + ggml_norm_impl + ggml_norm_inplace + ggml_nrows + ggml_op_name + ggml_opt + ggml_opt_default_params + ggml_opt_init + ggml_opt_resume + ggml_opt_resume_g + ggml_out_prod + ggml_permute + ggml_print_object + ggml_print_objects + ggml_quantize_chunk + ggml_quantize_q2_K + ggml_quantize_q3_K + ggml_quantize_q4_0 + ggml_quantize_q4_1 + ggml_quantize_q4_K + ggml_quantize_q5_0 + ggml_quantize_q5_1 + ggml_quantize_q5_K + ggml_quantize_q6_K + ggml_quantize_q8_0 + ggml_relu + ggml_relu_impl + ggml_relu_inplace + ggml_repeat + ggml_repeat2 + ggml_repeat_back + ggml_reshape + ggml_reshape_1d + ggml_reshape_2d + ggml_reshape_3d + ggml_reshape_4d + ggml_rms_norm + ggml_rms_norm_back + ggml_rms_norm_impl + ggml_rms_norm_inplace + ggml_rope + ggml_rope_back + ggml_rope_impl + ggml_rope_inplace + ggml_scale + ggml_scale_impl + ggml_scale_inplace + ggml_scratch_load + ggml_scratch_save + ggml_set + ggml_set_1d + ggml_set_1d_inplace + ggml_set_2d + ggml_set_2d_inplace + ggml_set_f32 + ggml_set_f32_1d + ggml_set_i32 + ggml_set_i32_1d + ggml_set_impl + ggml_set_inplace + ggml_set_name + ggml_set_no_alloc + ggml_set_param + ggml_set_scratch + ggml_set_zero + ggml_sgn + ggml_sgn_impl + ggml_sgn_inplace + ggml_silu + ggml_silu_back + ggml_silu_impl + ggml_silu_inplace + ggml_soft_max + ggml_soft_max_back + ggml_soft_max_back_impl + ggml_soft_max_back_inplace + ggml_soft_max_impl + ggml_soft_max_inplace + ggml_sqr + ggml_sqr_impl + ggml_sqr_inplace + ggml_sqrt + ggml_sqrt_impl + ggml_sqrt_inplace + ggml_step + ggml_step_impl + ggml_step_inplace + ggml_sub + ggml_sub_impl + ggml_sub_inplace + ggml_sum + ggml_sum_rows + ggml_tensor_overhead + ggml_time_init + ggml_time_ms + ggml_time_us + ggml_transpose + ggml_type_name + ggml_type_size + ggml_type_sizef + ggml_used_mem + ggml_vec_dot_q2_K_q8_K + ggml_vec_dot_q3_K_q8_K + ggml_vec_dot_q4_K_q8_K + ggml_vec_dot_q5_K_q8_K + ggml_vec_dot_q6_K_q8_K + ggml_view_1d + ggml_view_2d + ggml_view_3d + ggml_view_4d + ggml_view_tensor + llama_apply_lora_from_file + llama_context_default_params + llama_copy_state_data + llama_eval + llama_eval_export + llama_free + llama_get_embeddings + llama_get_kv_cache_token_count + llama_get_logits + llama_get_state_size + llama_get_vocab + llama_init_backend + llama_init_from_file + llama_load_session_file + llama_mlock_supported + llama_mmap_supported + llama_model_quantize + llama_model_quantize_default_params + llama_n_ctx + llama_n_embd + llama_n_vocab + llama_print_system_info + llama_print_timings + llama_reset_timings + llama_sample_frequency_and_presence_penalties + llama_sample_repetition_penalty + llama_sample_softmax + llama_sample_tail_free + llama_sample_temperature + llama_sample_token + llama_sample_token_greedy + llama_sample_token_mirostat + llama_sample_token_mirostat_v2 + llama_sample_top_k + llama_sample_top_p + llama_sample_typical + llama_save_session_file + llama_set_rng_seed + llama_set_state_data + llama_time_us + llama_token_bos + llama_token_eos + llama_token_nl + llama_token_to_str + llama_tokenize + printf + quantize_row_q2_K + quantize_row_q2_K_reference + quantize_row_q3_K + quantize_row_q3_K_reference + quantize_row_q4_K + quantize_row_q4_K_reference + quantize_row_q5_K + quantize_row_q5_K_reference + quantize_row_q6_K + quantize_row_q6_K_reference + quantize_row_q8_K + quantize_row_q8_K_reference + snprintf + vsnprintf + wmemcpy + wmemset diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/exports.def.objs b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/exports.def.objs new file mode 100644 index 0000000000000000000000000000000000000000..459f8af64fc5d74313c66c1b3536836a34e12677 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/exports.def.objs @@ -0,0 +1,3 @@ +CMakeFiles\ctransformers.dir\models\llm.cc.obj +CMakeFiles\ctransformers.dir\models\ggml\ggml.c.obj +CMakeFiles\ctransformers.dir\models\ggml\k_quants.c.obj diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/ggml/ggml.c.obj b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/ggml/ggml.c.obj new file mode 100644 index 0000000000000000000000000000000000000000..b7370c9a34d8fcef86ed8c344b273bb0b3d90d98 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/ggml/ggml.c.obj differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/ggml/k_quants.c.obj b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/ggml/k_quants.c.obj new file mode 100644 index 0000000000000000000000000000000000000000..1061fa49f42a1cec882ad90ba64acd99d5a63862 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/ggml/k_quants.c.obj differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/llm.cc.obj b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/llm.cc.obj new file mode 100644 index 0000000000000000000000000000000000000000..08e7796152a59b878f653762e9eafb18f1f189d9 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/ctransformers.dir/models/llm.cc.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15ed433485918bb8c758685ebea3a642015160a323f6a217ec50c9c35a5da560 +size 4429921 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/rules.ninja b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/rules.ninja new file mode 100644 index 0000000000000000000000000000000000000000..4df9e7a0ca33985698b367cef90283542d4ea814 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeFiles/rules.ninja @@ -0,0 +1,78 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Ninja" Generator, CMake Version 3.26 + +# This file contains all the rules used to get the outputs files +# built from the input files. +# It is included in the main 'build.ninja'. + +# ============================================================================= +# Project: ctransformers +# Configurations: Release +# ============================================================================= +# ============================================================================= + +############################################# +# localized /showIncludes string + +msvc_deps_prefix = Note: including file: + + +############################################# +# Rule for compiling C files. + +rule C_COMPILER__ctransformers_unscanned_Release + deps = msvc + command = C:\PROGRA~2\MICROS~3\2019\BUILDT~1\VC\Tools\MSVC\1429~1.301\bin\Hostx86\x64\cl.exe /nologo $DEFINES $INCLUDES $FLAGS /showIncludes /Fo$out /Fd$TARGET_COMPILE_PDB /FS -c $in + description = Building C object $out + + +############################################# +# Rule for compiling CXX files. + +rule CXX_COMPILER__ctransformers_unscanned_Release + deps = msvc + command = C:\PROGRA~2\MICROS~3\2019\BUILDT~1\VC\Tools\MSVC\1429~1.301\bin\Hostx86\x64\cl.exe /nologo /TP $DEFINES $INCLUDES $FLAGS /showIncludes /Fo$out /Fd$TARGET_COMPILE_PDB /FS -c $in + description = Building CXX object $out + + +############################################# +# Rule for linking CXX shared library. + +rule CXX_SHARED_LIBRARY_LINKER__ctransformers_Release + command = cmd.exe /C "$PRE_LINK && C:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\bin\cmake.exe -E vs_link_dll --intdir=$OBJECT_DIR --rc=C:\PROGRA~2\WI3CF2~1\10\bin\100190~1.0\x86\rc.exe --mt=C:\PROGRA~2\WI3CF2~1\10\bin\100190~1.0\x86\mt.exe --manifests $MANIFESTS -- C:\PROGRA~2\MICROS~3\2019\BUILDT~1\VC\Tools\MSVC\1429~1.301\bin\Hostx86\x64\link.exe /nologo $in /out:$TARGET_FILE /implib:$TARGET_IMPLIB /pdb:$TARGET_PDB /dll /version:0.0 $LINK_FLAGS $LINK_PATH $LINK_LIBRARIES && $POST_BUILD" + description = Linking CXX shared library $TARGET_FILE + restat = $RESTAT + + +############################################# +# Rule for running custom commands. + +rule CUSTOM_COMMAND + command = $COMMAND + description = $DESC + + +############################################# +# Rule for re-running cmake. + +rule RERUN_CMAKE + command = C:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\bin\cmake.exe --regenerate-during-build -S"D:\Projects\Replit 3b\replit-3B-inference\ctransformers" -B"D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build" + description = Re-running CMake... + generator = 1 + + +############################################# +# Rule for cleaning all built files. + +rule CLEAN + command = C:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\ninja\data\bin\ninja $FILE_ARG -t clean $TARGETS + description = Cleaning all built files... + + +############################################# +# Rule for printing all primary targets available. + +rule HELP + command = C:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\ninja\data\bin\ninja -t targets + description = All primary targets available: + diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeSpec.json b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeSpec.json new file mode 100644 index 0000000000000000000000000000000000000000..43c90a36e43dfdf7f2decebae6522ad7c87dceed --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/CMakeSpec.json @@ -0,0 +1 @@ +{"args": ["C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\overlay\\Lib\\site-packages\\cmake\\data\\bin/cmake.exe", "-DCMAKE_BUILD_TYPE:STRING=Release"], "version": "3.26.4", "environment": {"PYTHONNOUSERSITE": "1", "PYTHONPATH": "C:\\Users\\milin\\AppData\\Local\\Temp\\pip-build-env-nufd4ytl\\site"}} \ No newline at end of file diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/build.ninja b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/build.ninja new file mode 100644 index 0000000000000000000000000000000000000000..a451803928e9e44c0a9c2a55ffb1a10b8eb8bbcd --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/build.ninja @@ -0,0 +1,203 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Ninja" Generator, CMake Version 3.26 + +# This file contains all the build statements describing the +# compilation DAG. + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# +# Which is the root file. +# ============================================================================= + +# ============================================================================= +# Project: ctransformers +# Configurations: Release +# ============================================================================= + +############################################# +# Minimal version of Ninja required by this file + +ninja_required_version = 1.5 + + +############################################# +# Set configuration variable for custom commands. + +CONFIGURATION = Release +# ============================================================================= +# Include auxiliary files. + + +############################################# +# Include rules file. + +include CMakeFiles\rules.ninja + +# ============================================================================= + +############################################# +# Logical path to working directory; prefix for absolute paths. + +cmake_ninja_workdir = D$:\Projects\Replit$ 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build\ +# ============================================================================= +# Object build statements for SHARED_LIBRARY target ctransformers + + +############################################# +# Order-only phony target for ctransformers + +build cmake_object_order_depends_target_ctransformers: phony || CMakeFiles\ctransformers.dir + +build CMakeFiles\ctransformers.dir\models\llm.cc.obj: CXX_COMPILER__ctransformers_unscanned_Release D$:\Projects\Replit$ 3b\replit-3B-inference\ctransformers\models\llm.cc || cmake_object_order_depends_target_ctransformers + DEFINES = -DGGML_USE_K_QUANTS -Dctransformers_EXPORTS + FLAGS = /DWIN32 /D_WINDOWS /GR /EHsc /O2 /Ob2 /DNDEBUG -MD /arch:AVX2 + INCLUDES = -I"D:\Projects\Replit 3b\replit-3B-inference\ctransformers\models" + OBJECT_DIR = CMakeFiles\ctransformers.dir + OBJECT_FILE_DIR = CMakeFiles\ctransformers.dir\models + TARGET_COMPILE_PDB = CMakeFiles\ctransformers.dir\ + TARGET_PDB = lib\ctransformers.pdb + +build CMakeFiles\ctransformers.dir\models\ggml\ggml.c.obj: C_COMPILER__ctransformers_unscanned_Release D$:\Projects\Replit$ 3b\replit-3B-inference\ctransformers\models\ggml\ggml.c || cmake_object_order_depends_target_ctransformers + DEFINES = -DGGML_USE_K_QUANTS -Dctransformers_EXPORTS + FLAGS = /DWIN32 /D_WINDOWS /O2 /Ob2 /DNDEBUG -std:c11 -MD /arch:AVX2 + INCLUDES = -I"D:\Projects\Replit 3b\replit-3B-inference\ctransformers\models" + OBJECT_DIR = CMakeFiles\ctransformers.dir + OBJECT_FILE_DIR = CMakeFiles\ctransformers.dir\models\ggml + TARGET_COMPILE_PDB = CMakeFiles\ctransformers.dir\ + TARGET_PDB = lib\ctransformers.pdb + +build CMakeFiles\ctransformers.dir\models\ggml\k_quants.c.obj: C_COMPILER__ctransformers_unscanned_Release D$:\Projects\Replit$ 3b\replit-3B-inference\ctransformers\models\ggml\k_quants.c || cmake_object_order_depends_target_ctransformers + DEFINES = -DGGML_USE_K_QUANTS -Dctransformers_EXPORTS + FLAGS = /DWIN32 /D_WINDOWS /O2 /Ob2 /DNDEBUG -std:c11 -MD /arch:AVX2 + INCLUDES = -I"D:\Projects\Replit 3b\replit-3B-inference\ctransformers\models" + OBJECT_DIR = CMakeFiles\ctransformers.dir + OBJECT_FILE_DIR = CMakeFiles\ctransformers.dir\models\ggml + TARGET_COMPILE_PDB = CMakeFiles\ctransformers.dir\ + TARGET_PDB = lib\ctransformers.pdb + + +# ============================================================================= +# Link build statements for SHARED_LIBRARY target ctransformers + + +############################################# +# Link the shared library lib\ctransformers.dll + +build lib\ctransformers.dll ctransformers.lib: CXX_SHARED_LIBRARY_LINKER__ctransformers_Release CMakeFiles\ctransformers.dir\models\llm.cc.obj CMakeFiles\ctransformers.dir\models\ggml\ggml.c.obj CMakeFiles\ctransformers.dir\models\ggml\k_quants.c.obj + LANGUAGE_COMPILE_FLAGS = /DWIN32 /D_WINDOWS /GR /EHsc /O2 /Ob2 /DNDEBUG -MD + LINK_FLAGS = /machine:x64 /INCREMENTAL:NO /DEF:CMakeFiles\ctransformers.dir\.\exports.def + LINK_LIBRARIES = kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib + OBJECT_DIR = CMakeFiles\ctransformers.dir + POST_BUILD = cd . + PRE_LINK = cmd.exe /C "C:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\bin\cmake.exe -E __create_def "D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build\CMakeFiles\ctransformers.dir\.\exports.def" "D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build\CMakeFiles\ctransformers.dir\.\exports.def.objs" && cd "D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build"" + RESTAT = 1 + TARGET_COMPILE_PDB = CMakeFiles\ctransformers.dir\ + TARGET_FILE = lib\ctransformers.dll + TARGET_IMPLIB = ctransformers.lib + TARGET_PDB = lib\ctransformers.pdb + + +############################################# +# Utility command for edit_cache + +build CMakeFiles\edit_cache.util: CUSTOM_COMMAND + COMMAND = cmd.exe /C "cd /D "D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build" && C:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\bin\cmake-gui.exe -S"D:\Projects\Replit 3b\replit-3B-inference\ctransformers" -B"D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build"" + DESC = Running CMake cache editor... + pool = console + restat = 1 + +build edit_cache: phony CMakeFiles\edit_cache.util + + +############################################# +# Utility command for rebuild_cache + +build CMakeFiles\rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cmd.exe /C "cd /D "D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build" && C:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\bin\cmake.exe --regenerate-during-build -S"D:\Projects\Replit 3b\replit-3B-inference\ctransformers" -B"D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build"" + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build rebuild_cache: phony CMakeFiles\rebuild_cache.util + + +############################################# +# Utility command for list_install_components + +build list_install_components: phony + + +############################################# +# Utility command for install + +build CMakeFiles\install.util: CUSTOM_COMMAND all + COMMAND = cmd.exe /C "cd /D "D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build" && C:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\bin\cmake.exe -P cmake_install.cmake" + DESC = Install the project... + pool = console + restat = 1 + +build install: phony CMakeFiles\install.util + + +############################################# +# Utility command for install/local + +build CMakeFiles\install\local.util: CUSTOM_COMMAND all + COMMAND = cmd.exe /C "cd /D "D:\Projects\Replit 3b\replit-3B-inference\ctransformers\_skbuild\win-amd64-3.11\cmake-build" && C:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\bin\cmake.exe -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake" + DESC = Installing only the local directory... + pool = console + restat = 1 + +build install\local: phony CMakeFiles\install\local.util + +# ============================================================================= +# Target aliases. + +build ctransformers: phony lib\ctransformers.dll + +build ctransformers.dll: phony lib\ctransformers.dll + +# ============================================================================= +# Folder targets. + +# ============================================================================= + +############################################# +# Folder: D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build + +build all: phony lib\ctransformers.dll + +# ============================================================================= +# Built-in targets + + +############################################# +# Re-run CMake if any of its inputs changed. + +build build.ninja: RERUN_CMAKE | C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCCompiler.cmake.in C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCCompilerABI.c C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCXXCompiler.cmake.in C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCXXCompilerABI.cpp C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCXXInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCommonLanguageInclude.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCompilerIdDetection.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCXXCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCompileFeatures.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCompilerABI.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCompilerId.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineRCCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineSystem.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeFindBinUtils.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeGenericSystem.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeInitializeConfigs.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeLanguageInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeParseImplicitIncludeInfo.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeParseImplicitLinkInfo.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeParseLibraryArchitecture.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeRCCompiler.cmake.in C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeRCInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeSystem.cmake.in C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeSystemSpecificInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeSystemSpecificInitialize.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeTestCCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeTestCXXCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeTestCompilerCommon.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeTestRCCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CheckCSourceCompiles.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CheckIncludeFile.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CheckLibraryExists.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\ADSP-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\ARMCC-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\ARMClang-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\AppleClang-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Borland-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Bruce-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\CMakeCommonCompilerMacros.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Clang-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Clang-DetermineCompilerInternal.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Comeau-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Compaq-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Compaq-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Cray-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Embarcadero-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Fujitsu-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\FujitsuClang-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\GHS-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\GNU-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\GNU-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\HP-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\HP-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IAR-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IBMCPP-C-DetermineVersionInternal.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IBMCPP-CXX-DetermineVersionInternal.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IBMClang-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IBMClang-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Intel-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IntelLLVM-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\LCC-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\LCC-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\MSVC-C.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\MSVC-CXX.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\MSVC-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\MSVC.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\NVHPC-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\NVIDIA-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\OpenWatcom-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\PGI-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\PathScale-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\SCO-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\SDCC-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\SunPro-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\SunPro-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\TI-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Tasking-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\TinyCC-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\VisualAge-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\VisualAge-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Watcom-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\XL-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\XL-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\XLClang-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\XLClang-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\zOS-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\zOS-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\FindPackageHandleStandardArgs.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\FindPackageMessage.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\FindThreads.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Internal\CheckSourceCompiles.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Internal\FeatureTesting.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows-Determine-CXX.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows-MSVC-C.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows-MSVC-CXX.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows-MSVC.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\WindowsPaths.cmake CMakeCache.txt CMakeFiles\3.26.4\CMakeCCompiler.cmake CMakeFiles\3.26.4\CMakeCXXCompiler.cmake CMakeFiles\3.26.4\CMakeRCCompiler.cmake CMakeFiles\3.26.4\CMakeSystem.cmake D$:\Projects\Replit$ 3b\replit-3B-inference\ctransformers\CMakeLists.txt + pool = console + + +############################################# +# A missing CMake input file is not an error. + +build C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCCompiler.cmake.in C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCCompilerABI.c C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCXXCompiler.cmake.in C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCXXCompilerABI.cpp C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCXXInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCommonLanguageInclude.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeCompilerIdDetection.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCXXCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCompileFeatures.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCompilerABI.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineCompilerId.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineRCCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeDetermineSystem.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeFindBinUtils.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeGenericSystem.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeInitializeConfigs.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeLanguageInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeParseImplicitIncludeInfo.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeParseImplicitLinkInfo.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeParseLibraryArchitecture.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeRCCompiler.cmake.in C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeRCInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeSystem.cmake.in C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeSystemSpecificInformation.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeSystemSpecificInitialize.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeTestCCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeTestCXXCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeTestCompilerCommon.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CMakeTestRCCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CheckCSourceCompiles.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CheckIncludeFile.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\CheckLibraryExists.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\ADSP-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\ARMCC-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\ARMClang-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\AppleClang-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Borland-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Bruce-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\CMakeCommonCompilerMacros.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Clang-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Clang-DetermineCompilerInternal.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Comeau-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Compaq-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Compaq-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Cray-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Embarcadero-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Fujitsu-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\FujitsuClang-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\GHS-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\GNU-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\GNU-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\HP-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\HP-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IAR-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IBMCPP-C-DetermineVersionInternal.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IBMCPP-CXX-DetermineVersionInternal.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IBMClang-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IBMClang-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Intel-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\IntelLLVM-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\LCC-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\LCC-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\MSVC-C.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\MSVC-CXX.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\MSVC-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\MSVC.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\NVHPC-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\NVIDIA-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\OpenWatcom-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\PGI-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\PathScale-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\SCO-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\SDCC-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\SunPro-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\SunPro-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\TI-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Tasking-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\TinyCC-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\VisualAge-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\VisualAge-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\Watcom-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\XL-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\XL-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\XLClang-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\XLClang-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\zOS-C-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Compiler\zOS-CXX-DetermineCompiler.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\FindPackageHandleStandardArgs.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\FindPackageMessage.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\FindThreads.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Internal\CheckSourceCompiles.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Internal\FeatureTesting.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows-Determine-CXX.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows-MSVC-C.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows-MSVC-CXX.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows-MSVC.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\Windows.cmake C$:\Users\milin\AppData\Local\Temp\pip-build-env-nufd4ytl\overlay\Lib\site-packages\cmake\data\share\cmake-3.26\Modules\Platform\WindowsPaths.cmake CMakeCache.txt CMakeFiles\3.26.4\CMakeCCompiler.cmake CMakeFiles\3.26.4\CMakeCXXCompiler.cmake CMakeFiles\3.26.4\CMakeRCCompiler.cmake CMakeFiles\3.26.4\CMakeSystem.cmake D$:\Projects\Replit$ 3b\replit-3B-inference\ctransformers\CMakeLists.txt: phony + + +############################################# +# Clean all the built files. + +build clean: CLEAN + + +############################################# +# Print all primary targets available. + +build help: HELP + + +############################################# +# Make the all target the default. + +default all diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/cmake_install.cmake b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/cmake_install.cmake new file mode 100644 index 0000000000000000000000000000000000000000..b1550802387e62a6e19c1e1aaa4dce42d1afd988 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/cmake_install.cmake @@ -0,0 +1,48 @@ +# Install script for directory: D:/Projects/Replit 3b/replit-3B-inference/ctransformers + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-install") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "Release") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Is this installation the result of a crosscompile? +if(NOT DEFINED CMAKE_CROSSCOMPILING) + set(CMAKE_CROSSCOMPILING "FALSE") +endif() + +if(CMAKE_INSTALL_COMPONENT STREQUAL "Unspecified" OR NOT CMAKE_INSTALL_COMPONENT) + file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/ctransformers/lib/local" TYPE SHARED_LIBRARY FILES "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/lib/ctransformers.dll") +endif() + +if(CMAKE_INSTALL_COMPONENT) + set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") +else() + set(CMAKE_INSTALL_MANIFEST "install_manifest.txt") +endif() + +string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT + "${CMAKE_INSTALL_MANIFEST_FILES}") +file(WRITE "D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-build/${CMAKE_INSTALL_MANIFEST}" + "${CMAKE_INSTALL_MANIFEST_CONTENT}") diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/ctransformers.exp b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/ctransformers.exp new file mode 100644 index 0000000000000000000000000000000000000000..f2c36a9e8f12e5fba3facfa81e8fbaaa4e958e04 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/ctransformers.exp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca349b36bbff29118507644fde4ebf154092e961ba14e365b758e37a1e720708 +size 2668191 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/ctransformers.lib b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/ctransformers.lib new file mode 100644 index 0000000000000000000000000000000000000000..e8901483054e5930f179b8918364c917cc751749 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/ctransformers.lib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e01cf399be566d8314afd4647830626ffbebe4ced71c7ebafc419d934d7085f +size 4458470 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/install_manifest.txt b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/install_manifest.txt new file mode 100644 index 0000000000000000000000000000000000000000..a29b66f527b3e95e57614d07579e5adc8e629cef --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/install_manifest.txt @@ -0,0 +1 @@ +D:/Projects/Replit 3b/replit-3B-inference/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/local/ctransformers.dll \ No newline at end of file diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/lib/ctransformers.dll b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/lib/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..c40e060659a5d729b536577c8c9965ad1c334370 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/lib/ctransformers.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b904a216aaa1cba77cfa1dad77bed395441a7ad53fdf4f701f99ca1129fa3cf +size 1616896 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-build/lib/ctransformers.dll.manifest b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/lib/ctransformers.dll.manifest new file mode 100644 index 0000000000000000000000000000000000000000..ecea6f7f567079e7ee7e10d661a92f5de67fdd55 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-build/lib/ctransformers.dll.manifest @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/__init__.py b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..48e85bf19d65722700878f3f993350a8938b9d64 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/__init__.py @@ -0,0 +1,2 @@ +from .llm import Config, LLM +from .hub import AutoConfig, AutoModelForCausalLM diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/hub.py b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/hub.py new file mode 100644 index 0000000000000000000000000000000000000000..86e3129f4a9057a3447f76dd44f59a3772a3321d --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/hub.py @@ -0,0 +1,215 @@ +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +from huggingface_hub import HfApi, snapshot_download +from huggingface_hub.utils import validate_repo_id, HFValidationError + +from .llm import Config, LLM + + +def get_path_type(path: str) -> Optional[str]: + p = Path(path) + if p.is_file(): + return "file" + elif p.is_dir(): + return "dir" + try: + validate_repo_id(path) + return "repo" + except HFValidationError: + pass + + +@dataclass +class AutoConfig: + config: Config + model_type: Optional[str] = None + + @classmethod + def from_pretrained( + cls, + model_path_or_repo_id: str, + local_files_only: bool = False, + **kwargs, + ) -> "AutoConfig": + path_type = get_path_type(model_path_or_repo_id) + if not path_type: + raise ValueError(f"Model path '{model_path_or_repo_id}' doesn't exist.") + + config = Config() + auto_config = AutoConfig(config=config) + + if path_type == "dir": + cls._update_from_dir(model_path_or_repo_id, auto_config) + elif path_type == "repo": + cls._update_from_repo( + model_path_or_repo_id, + auto_config, + local_files_only=local_files_only, + ) + + for k, v in kwargs.items(): + if not hasattr(config, k): + raise TypeError( + f"'{k}' is an invalid keyword argument for from_pretrained()" + ) + setattr(config, k, v) + + return auto_config + + @classmethod + def _update_from_repo( + cls, + repo_id: str, + auto_config: "AutoConfig", + local_files_only: bool, + ) -> None: + path = snapshot_download( + repo_id=repo_id, + allow_patterns="config.json", + local_files_only=local_files_only, + ) + cls._update_from_dir(path, auto_config) + + @classmethod + def _update_from_dir(cls, path: str, auto_config: "AutoConfig") -> None: + path = (Path(path) / "config.json").resolve() + if path.is_file(): + cls._update_from_file(path, auto_config) + + @classmethod + def _update_from_file(cls, path: str, auto_config: "AutoConfig") -> None: + with open(path) as f: + config = json.load(f) + + auto_config.model_type = config.get("model_type") + params = config.get("task_specific_params", {}) + params = params.get("text-generation", {}) + for name in [ + "top_k", + "top_p", + "temperature", + "repetition_penalty", + "last_n_tokens", + ]: + value = params.get(name) + if value is not None: + setattr(auto_config.config, name, value) + + +class AutoModelForCausalLM: + @classmethod + def from_pretrained( + cls, + model_path_or_repo_id: str, + *, + model_type: Optional[str] = None, + model_file: Optional[str] = None, + config: Optional[AutoConfig] = None, + lib: Optional[str] = None, + local_files_only: bool = False, + **kwargs, + ) -> LLM: + """Loads the language model from a local file or remote repo. + + Args: + model_path_or_repo_id: The path to a model file or directory or the + name of a Hugging Face Hub model repo. + model_type: The model type. + model_file: The name of the model file in repo or directory. + config: `AutoConfig` object. + lib: The path to a shared library or one of `avx2`, `avx`, `basic`. + local_files_only: Whether or not to only look at local files + (i.e., do not try to download the model). + + Returns: + `LLM` object. + """ + config = config or AutoConfig.from_pretrained( + model_path_or_repo_id, + local_files_only=local_files_only, + **kwargs, + ) + model_type = model_type or config.model_type + if not model_type: + raise ValueError( + "Unable to detect model type. Please specify a model type using:\n\n" + " AutoModelForCausalLM.from_pretrained(..., model_type='...')\n\n" + ) + + path_type = get_path_type(model_path_or_repo_id) + model_path = None + if path_type == "file": + model_path = model_path_or_repo_id + elif path_type == "dir": + model_path = cls._find_model_path_from_dir( + model_path_or_repo_id, model_file + ) + elif path_type == "repo": + model_path = cls._find_model_path_from_repo( + model_path_or_repo_id, + model_file, + local_files_only=local_files_only, + ) + + return LLM( + model_path=model_path, + model_type=model_type, + config=config.config, + lib=lib, + ) + + @classmethod + def _find_model_path_from_repo( + cls, + repo_id: str, + filename: Optional[str], + local_files_only: bool, + ) -> str: + if not filename and not local_files_only: + filename = cls._find_model_file_from_repo(repo_id=repo_id) + allow_patterns = filename or "*.bin" + path = snapshot_download( + repo_id=repo_id, + allow_patterns=allow_patterns, + local_files_only=local_files_only, + ) + return cls._find_model_path_from_dir(path, filename=filename) + + @classmethod + def _find_model_file_from_repo(cls, repo_id: str) -> Optional[str]: + api = HfApi() + repo_info = api.repo_info(repo_id=repo_id, files_metadata=True) + files = [ + (f.size, f.rfilename) + for f in repo_info.siblings + if f.rfilename.endswith(".bin") + ] + if not files: + raise ValueError(f"No model file found in repo '{repo_id}'") + return min(files)[1] + + @classmethod + def _find_model_path_from_dir( + cls, + path: str, + filename: Optional[str] = None, + ) -> str: + path = Path(path).resolve() + if filename: + file = (path / filename).resolve() + if not file.is_file(): + raise ValueError(f"Model file '{filename}' not found in '{path}'") + return str(file) + + files = [ + (f.stat().st_size, f) + for f in path.iterdir() + if f.is_file() and f.name.endswith(".bin") + ] + if not files: + raise ValueError(f"No model file found in directory '{path}'") + file = min(files)[1] + return str(file.resolve()) diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/langchain.py b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/langchain.py new file mode 100644 index 0000000000000000000000000000000000000000..eadb57b94089c2061776514e73fcabb8a3a151fc --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/langchain.py @@ -0,0 +1,89 @@ +try: + from langchain.llms.base import LLM +except ImportError: + raise ImportError( + "To use the ctransformers.langchain module, please install the " + "`langchain` python package: `pip install langchain`" + ) + +from typing import Any, Dict, Optional, Sequence + +from pydantic import root_validator +from langchain.callbacks.manager import CallbackManagerForLLMRun + +from ctransformers import AutoModelForCausalLM + + +class CTransformers(LLM): + """Wrapper around the C Transformers LLM interface. + + To use, you should have the `langchain` python package installed. + """ + + client: Any #: :meta private: + + model: str + """The path to a model file or directory or the name of a Hugging Face Hub + model repo.""" + + model_type: Optional[str] = None + """The model type.""" + + model_file: Optional[str] = None + """The name of the model file in repo or directory.""" + + config: Optional[Dict[str, Any]] = None + """The config parameters.""" + + lib: Optional[Any] = None + """The path to a shared library or one of `avx2`, `avx`, `basic`.""" + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return { + "model": self.model, + "model_type": self.model_type, + "model_file": self.model_file, + "config": self.config, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "ctransformers" + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate and load model from a local file or remote repo.""" + config = values["config"] or {} + values["client"] = AutoModelForCausalLM.from_pretrained( + values["model"], + model_type=values["model_type"], + model_file=values["model_file"], + lib=values["lib"], + **config, + ) + return values + + def _call( + self, + prompt: str, + stop: Optional[Sequence[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + ) -> str: + """Generate text from a prompt. + + Args: + prompt: The prompt to generate text from. + stop: A list of sequences to stop generation when encountered. + + Returns: + The generated text. + """ + text = [] + for chunk in self.client(prompt, stop=stop, stream=True): + text.append(chunk) + if run_manager: + run_manager.on_llm_new_token(chunk, verbose=self.verbose) + return "".join(text) diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib.py b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib.py new file mode 100644 index 0000000000000000000000000000000000000000..c241453409dacdab7ad2e43304083a22760db01b --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib.py @@ -0,0 +1,42 @@ +from typing import Optional +import platform +from pathlib import Path + + +def find_library(path: Optional[str] = None) -> str: + lib_directory = Path(__file__).parent.resolve() / "lib" + + if path: + subdirs = [d.name for d in lib_directory.iterdir() if d.is_dir()] + if path not in subdirs: + return path + + system = platform.system() + if not path: + if (lib_directory / "local").is_dir(): + path = "local" + elif platform.processor() == "arm": + # Apple silicon doesn't support AVX/AVX2. + path = "basic" if system == "Darwin" else "" + else: + path = "avx2" + + name = "ctransformers" + if system == "Linux": + name = f"lib{name}.so" + elif system == "Windows": + name = f"{name}.dll" + elif system == "Darwin": + name = f"lib{name}.dylib" + else: + name = "" + + path = lib_directory / path / name + if not path.is_file(): + raise OSError( + "Precompiled binaries are not available for the current platform. " + "Please reinstall from source using:\n\n" + " pip uninstall ctransformers --yes\n" + " pip install ctransformers --no-binary ctransformers\n\n" + ) + return str(path) diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/ctransformers.dll b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..cc7ed9bb6d6cb69146e15b0a51ea6807eae9974a Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/ctransformers.dll differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.dylib b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.dylib new file mode 100644 index 0000000000000000000000000000000000000000..3d9a5d6046f2a50ca4b5e54721ee0bf9e4e8e29f --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed063a9cd34374aec9c943b3330aaf476d40a116220fc49c871e50f868d024f +size 1797491 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.so b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.so new file mode 100644 index 0000000000000000000000000000000000000000..b2a8cf458bc04e9abe82a1ff5c1eefba5e88bce4 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb70a475777b52627511001d4fa2d2ea78d47ca9cb66920c42e31a80859b82e +size 1038856 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/ctransformers.dll b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..280f4ee2c62d0c0cd8124382b4c4684494c749f1 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/ctransformers.dll differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.dylib b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.dylib new file mode 100644 index 0000000000000000000000000000000000000000..032ad8ddb4e43e46528c587bce491603dbddf9bd --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4197c6330368c2c55e8eaf802440a94232ccd59370925edc2fb1ee086acc8258 +size 1781107 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.so b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.so new file mode 100644 index 0000000000000000000000000000000000000000..60d3f48374116284b207d26773d6b28f789737f0 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56cd4a044bab0ca83e3474be8b5a3042a6a25b57916506fefdc5a8601305a6c5 +size 1042952 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/ctransformers.dll b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..441d9acc4c6bd05af18a9b8bf881e3fdd1083ed8 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/ctransformers.dll differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/libctransformers.dylib b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/libctransformers.dylib new file mode 100644 index 0000000000000000000000000000000000000000..93dc0177132d6e591a70824bed794c276e7687fd --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/libctransformers.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a347ed7fd0b513e14d2d9c42a93b53f74cfb328c7b51add8e60a66a8f8c2839e +size 1781107 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/libctransformers.so b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/libctransformers.so new file mode 100644 index 0000000000000000000000000000000000000000..4109aa1b28665fc325f8a2dbb1c7fded2c7a289c Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/libctransformers.so differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/local/ctransformers.dll b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/local/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..c40e060659a5d729b536577c8c9965ad1c334370 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/local/ctransformers.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b904a216aaa1cba77cfa1dad77bed395441a7ad53fdf4f701f99ca1129fa3cf +size 1616896 diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/llm.py b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/llm.py new file mode 100644 index 0000000000000000000000000000000000000000..aa4d9e0b7fdc0141dfe8cfe3f77836ee6a47d6b3 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/llm.py @@ -0,0 +1,584 @@ +import inspect +import os +import re +from collections import OrderedDict +from dataclasses import dataclass +from functools import partial +from pathlib import Path +from ctypes import ( + CDLL, + c_bool, + c_int, + c_float, + c_char_p, + c_void_p, + POINTER, +) +from typing import ( + Any, + Callable, + Generator, + List, + Optional, + Sequence, + Union, +) + +from .lib import find_library +from .utils import Vector, utf8_split_incomplete + +c_int_p = POINTER(c_int) +c_float_p = POINTER(c_float) +llm_p = c_void_p + + +@dataclass +class Config: + # sample + top_k: int = 40 + top_p: float = 0.95 + temperature: float = 0.8 + repetition_penalty: float = 1.1 + last_n_tokens: int = 64 + seed: int = -1 + + # eval + batch_size: int = 8 + threads: int = -1 + + # generate + max_new_tokens: int = 256 + stop: Optional[Sequence[str]] = None + stream: bool = False + reset: bool = True + + # model + context_length: int = -1 + gpu_layers: int = 0 + + +docs = OrderedDict( + top_k="The top-k value to use for sampling.", + top_p="The top-p value to use for sampling.", + temperature="The temperature to use for sampling.", + repetition_penalty="The repetition penalty to use for sampling.", + last_n_tokens="The number of last tokens to use for repetition penalty.", + seed="The seed value to use for sampling tokens.", + max_new_tokens="The maximum number of new tokens to generate.", + stop="A list of sequences to stop generation when encountered.", + stream="Whether to stream the generated text.", + reset="Whether to reset the model state before generating text.", + batch_size="The batch size to use for evaluating tokens.", + threads="The number of threads to use for evaluating tokens.", + context_length="The maximum context length to use.", + gpu_layers="The number of layers to run on GPU.", +) + + +def doc(fn): + doc = [] + for param in inspect.signature(fn).parameters: + if param in docs: + default = getattr(Config, param) + doc.append(f"{param}: {docs[param]} Default: `{default}`") + doc = ("\n" + " " * 12).join(doc) + fn.__doc__ = fn.__doc__.format(params=doc) + return fn + + +def get(*values): + for value in values: + if value is not None: + return value + + +def load_library(path: Optional[str] = None) -> Any: + # https://docs.python.org/3.8/whatsnew/3.8.html#bpo-36085-whatsnew + # https://github.com/abetlen/llama-cpp-python/pull/225 + if hasattr(os, "add_dll_directory") and "CUDA_PATH" in os.environ: + os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin")) + + path = find_library(path) + lib = CDLL(path) + + lib.ctransformers_llm_create.argtypes = [ + c_char_p, # model_path + c_char_p, # model_type + c_int, # context_length + c_int, # gpu_layers + ] + lib.ctransformers_llm_create.restype = llm_p + + lib.ctransformers_llm_delete.argtypes = [llm_p] + lib.ctransformers_llm_delete.restype = None + + lib.ctransformers_llm_tokenize.argtypes = [ + llm_p, + c_char_p, # text + c_int_p, # output + ] + lib.ctransformers_llm_tokenize.restype = c_int + + lib.ctransformers_llm_detokenize.argtypes = [ + llm_p, + c_int, # token + ] + lib.ctransformers_llm_detokenize.restype = c_char_p + + lib.ctransformers_llm_is_eos_token.argtypes = [ + llm_p, + c_int, # token + ] + lib.ctransformers_llm_is_eos_token.restype = c_bool + + lib.ctransformers_llm_eos_token_id.argtypes = [llm_p] + lib.ctransformers_llm_eos_token_id.restype = c_int + + lib.ctransformers_llm_vocab_size.argtypes = [llm_p] + lib.ctransformers_llm_vocab_size.restype = c_int + + lib.ctransformers_llm_context_length.argtypes = [llm_p] + lib.ctransformers_llm_context_length.restype = c_int + + lib.ctransformers_llm_batch_eval.argtypes = [ + llm_p, + c_int_p, # tokens + c_int, # n_tokens + c_int, # batch_size + c_int, # threads + ] + lib.ctransformers_llm_batch_eval.restype = c_bool + + lib.ctransformers_llm_logits_data.argtypes = [llm_p] + lib.ctransformers_llm_logits_data.restype = c_float_p + lib.ctransformers_llm_logits_size.argtypes = [llm_p] + lib.ctransformers_llm_logits_size.restype = c_int + + lib.ctransformers_llm_embeddings_data.argtypes = [llm_p] + lib.ctransformers_llm_embeddings_data.restype = c_float_p + lib.ctransformers_llm_embeddings_size.argtypes = [llm_p] + lib.ctransformers_llm_embeddings_size.restype = c_int + + lib.ctransformers_llm_sample.argtypes = [ + llm_p, + c_int, # top_k + c_float, # top_p + c_float, # temperature + c_float, # repetition_penalty + c_int, # last_n_tokens + c_int, # seed + ] + lib.ctransformers_llm_sample.restype = c_int + + lib.ctransformers_llm_reset.argtypes = [llm_p] + lib.ctransformers_llm_reset.restype = None + + return lib + + +class LLM: + def __init__( + self, + model_path: str, + model_type: str, + *, + config: Optional[Config] = None, + lib: Optional[str] = None, + ): + """Loads the language model from a local file. + + Args: + model_path: The path to a model file. + model_type: The model type. + config: `Config` object. + lib: The path to a shared library or one of `avx2`, `avx`, `basic`. + """ + config = config or Config() + self._model_path = model_path + self._model_type = model_type + self._config = config + self._llm = None + self._lib = None + + if not Path(model_path).is_file(): + raise ValueError(f"Model path '{model_path}' doesn't exist.") + + self._lib = load_library(lib) + self._llm = self._lib.ctransformers_llm_create( + model_path.encode(), + model_type.encode(), + config.context_length, + config.gpu_layers, + ) + if self._llm is None: + raise RuntimeError( + f"Failed to create LLM '{model_type}' from '{model_path}'." + ) + + @property + def model_path(self) -> str: + """The path to the model file.""" + return self._model_path + + @property + def model_type(self) -> str: + """The model type.""" + return self._model_type + + @property + def config(self) -> Config: + """The config object.""" + return self._config + + @property + def eos_token_id(self) -> int: + """The end-of-sequence token.""" + return self.ctransformers_llm_eos_token_id() + + @property + def vocab_size(self) -> int: + """The number of tokens in vocabulary.""" + return self.ctransformers_llm_vocab_size() + + @property + def context_length(self) -> int: + """The context length of model.""" + return self.ctransformers_llm_context_length() + + @property + def logits(self) -> List[float]: + """The unnormalized log probabilities.""" + return Vector( + self.ctransformers_llm_logits_data(), + self.ctransformers_llm_logits_size(), + ) + + @property + def embeddings(self) -> List[float]: + """The input embeddings.""" + return Vector( + self.ctransformers_llm_embeddings_data(), + self.ctransformers_llm_embeddings_size(), + ) + + def __getattr__(self, name: str) -> Callable: + lib, llm = self._lib, self._llm + if name.startswith("ctransformers_llm_") and hasattr(lib, name): + return partial(getattr(lib, name), llm) + raise AttributeError(f"'LLM' object has no attribute '{name}'") + + def tokenize(self, text: str) -> List[int]: + """Converts a text into list of tokens. + + Args: + text: The text to tokenize. + + Returns: + The list of tokens. + """ + tokens = (c_int * (len(text) + 1))() + n_tokens = self.ctransformers_llm_tokenize(text.encode(), tokens) + return tokens[:n_tokens] + + def detokenize( + self, + tokens: Sequence[int], + decode: bool = True, + ) -> Union[str, bytes]: + """Converts a list of tokens to text. + + Args: + tokens: The list of tokens. + decode: Whether to decode the text as UTF-8 string. + + Returns: + The combined text of all tokens. + """ + if isinstance(tokens, int): + tokens = [tokens] + texts = [] + for token in tokens: + text = self.ctransformers_llm_detokenize(token) + texts.append(text) + texts = b"".join(texts) + if decode: + texts = texts.decode(errors="ignore") + return texts + + def is_eos_token(self, token: int) -> bool: + """Checks if a token is an end-of-sequence token. + + Args: + token: The token to check. + + Returns: + `True` if the token is an end-of-sequence token else `False`. + """ + return self.ctransformers_llm_is_eos_token(token) + + @doc + def eval( + self, + tokens: Sequence[int], + *, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + ) -> None: + """Evaluates a list of tokens. + + Args: + tokens: The list of tokens to evaluate. + {params} + """ + config = self.config + batch_size = get(batch_size, config.batch_size) + threads = get(threads, config.threads) + + n_tokens = len(tokens) + tokens = (c_int * n_tokens)(*tokens) + status = self.ctransformers_llm_batch_eval( + tokens, + n_tokens, + batch_size, + threads, + ) + if not status: + raise RuntimeError("Failed to evaluate tokens.") + + @doc + def sample( + self, + *, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + ) -> int: + """Samples a token from the model. + + Args: + {params} + + Returns: + The sampled token. + """ + config = self.config + top_k = get(top_k, config.top_k) + top_p = get(top_p, config.top_p) + temperature = get(temperature, config.temperature) + repetition_penalty = get(repetition_penalty, config.repetition_penalty) + last_n_tokens = get(last_n_tokens, config.last_n_tokens) + seed = get(seed, config.seed) + + return self.ctransformers_llm_sample( + top_k, + top_p, + temperature, + repetition_penalty, + last_n_tokens, + seed, + ) + + def reset(self) -> None: + """Resets the model state.""" + self.ctransformers_llm_reset() + + def __del__(self): + if self._llm is not None: + self.ctransformers_llm_delete() + + @doc + def generate( + self, + tokens: Sequence[int], + *, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + reset: Optional[bool] = None, + ) -> Generator[int, None, None]: + """Generates new tokens from a list of tokens. + + Args: + tokens: The list of tokens to generate tokens from. + {params} + + Returns: + The generated tokens. + """ + config = self.config + reset = get(reset, config.reset) + + if reset: + self.reset() + + self.eval(tokens, batch_size=batch_size, threads=threads) + while True: + token = self.sample( + top_k=top_k, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + last_n_tokens=last_n_tokens, + seed=seed, + ) + self.eval([token], batch_size=batch_size, threads=threads) + if self.is_eos_token(token): + break + yield token + + def _stream( + self, + prompt: str, + *, + max_new_tokens: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + stop: Optional[Sequence[str]] = None, + reset: Optional[bool] = None, + ) -> Generator[str, None, None]: + config = self.config + max_new_tokens = get(max_new_tokens, config.max_new_tokens) + stop = get(stop, config.stop) or [] + if isinstance(stop, str): + stop = [stop] + + tokens = self.tokenize(prompt) + + stop_regex = re.compile("|".join(map(re.escape, stop))) + count = 0 + text = "" + incomplete = b"" + for token in self.generate( + tokens, + top_k=top_k, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + last_n_tokens=last_n_tokens, + seed=seed, + batch_size=batch_size, + threads=threads, + reset=reset, + ): + # Handle incomplete UTF-8 multi-byte characters. + incomplete += self.detokenize([token], decode=False) + complete, incomplete = utf8_split_incomplete(incomplete) + text += complete.decode(errors="ignore") + + # https://github.com/abetlen/llama-cpp-python/blob/1a13d76c487df1c8560132d10bda62d6e2f4fa93/llama_cpp/llama.py#L686-L706 + # Check if one of the stop sequences is part of the text. + # Note that the stop sequence may not always be at the end of text. + if stop: + match = stop_regex.search(text) + if match: + text = text[: match.start()] + break + + # Avoid sending the longest suffix of text which is also a prefix + # of a stop sequence, as it can form a stop sequence with the text + # generated later. + longest = 0 + for s in stop: + for i in range(len(s), 0, -1): + if text.endswith(s[:i]): + longest = max(i, longest) + break + + end = len(text) - longest + if end > 0: + yield text[:end] + text = text[end:] + + count += 1 + if count >= max_new_tokens: + break + + if text: + yield text + + @doc + def __call__( + self, + prompt: str, + *, + max_new_tokens: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + stop: Optional[Sequence[str]] = None, + stream: Optional[bool] = None, + reset: Optional[bool] = None, + ) -> Union[str, Generator[str, None, None]]: + """Generates text from a prompt. + + Args: + prompt: The prompt to generate text from. + {params} + + Returns: + The generated text. + """ + config = self.config + stream = get(stream, config.stream) + + text = self._stream( + prompt, + max_new_tokens=max_new_tokens, + top_k=top_k, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + last_n_tokens=last_n_tokens, + seed=seed, + batch_size=batch_size, + threads=threads, + stop=stop, + reset=reset, + ) + if stream: + return text + return "".join(text) + + @doc + def embed( + self, + input: Union[str, Sequence[int]], + *, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + ) -> List[float]: + """Computes embeddings for a text or list of tokens. + + > **Note:** Currently only LLaMA models support embeddings. + + Args: + input: The input text or list of tokens to get embeddings for. + {params} + + Returns: + The input embeddings. + """ + if isinstance(input, str): + input = self.tokenize(input) + self.reset() + self.eval(input, batch_size=batch_size, threads=threads) + return list(self.embeddings) diff --git a/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/utils.py b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..894e4da0d1e29fbaad06e10554edc2e99e65126b --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/cmake-install/ctransformers/utils.py @@ -0,0 +1,48 @@ +from collections.abc import MutableSequence +from typing import Any, Tuple + + +class Vector(MutableSequence): + """Provides a Python list-like interface for a C array to access and modify + data in-place without copying the entire data between C and Python. + """ + + def __init__(self, data: Any, size: int): + self._data = data + self._size = size + + def __getitem__(self, index: int) -> Any: + self._validate_index(index) + return self._data[index] + + def __setitem__(self, index: int, value: Any) -> None: + self._validate_index(index) + self._data[index] = value + + def __len__(self) -> int: + return self._size + + def _validate_index(self, index: int) -> None: + if not isinstance(index, int): + raise TypeError("list index must be integer") + if not 0 <= index < self._size: + raise IndexError("list index out of range") + + def __delitem__(self, index: int) -> None: + raise NotImplementedError("This operation is not allowed.") + + def insert(self, index: int, value: Any) -> None: + raise NotImplementedError("This operation is not allowed.") + + +def utf8_is_continuation_byte(byte: int) -> bool: + """Checks if a byte is a UTF-8 continuation byte (most significant bit is 1).""" + return (byte & 0b10000000) != 0 + + +def utf8_split_incomplete(seq: bytes) -> Tuple[bytes, bytes]: + """Splits a sequence of UTF-8 encoded bytes into complete and incomplete bytes.""" + i = len(seq) + while i > 0 and utf8_is_continuation_byte(seq[i - 1]): + i -= 1 + return seq[:i], seq[i:] diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/__init__.py b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..48e85bf19d65722700878f3f993350a8938b9d64 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/__init__.py @@ -0,0 +1,2 @@ +from .llm import Config, LLM +from .hub import AutoConfig, AutoModelForCausalLM diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/hub.py b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/hub.py new file mode 100644 index 0000000000000000000000000000000000000000..86e3129f4a9057a3447f76dd44f59a3772a3321d --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/hub.py @@ -0,0 +1,215 @@ +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +from huggingface_hub import HfApi, snapshot_download +from huggingface_hub.utils import validate_repo_id, HFValidationError + +from .llm import Config, LLM + + +def get_path_type(path: str) -> Optional[str]: + p = Path(path) + if p.is_file(): + return "file" + elif p.is_dir(): + return "dir" + try: + validate_repo_id(path) + return "repo" + except HFValidationError: + pass + + +@dataclass +class AutoConfig: + config: Config + model_type: Optional[str] = None + + @classmethod + def from_pretrained( + cls, + model_path_or_repo_id: str, + local_files_only: bool = False, + **kwargs, + ) -> "AutoConfig": + path_type = get_path_type(model_path_or_repo_id) + if not path_type: + raise ValueError(f"Model path '{model_path_or_repo_id}' doesn't exist.") + + config = Config() + auto_config = AutoConfig(config=config) + + if path_type == "dir": + cls._update_from_dir(model_path_or_repo_id, auto_config) + elif path_type == "repo": + cls._update_from_repo( + model_path_or_repo_id, + auto_config, + local_files_only=local_files_only, + ) + + for k, v in kwargs.items(): + if not hasattr(config, k): + raise TypeError( + f"'{k}' is an invalid keyword argument for from_pretrained()" + ) + setattr(config, k, v) + + return auto_config + + @classmethod + def _update_from_repo( + cls, + repo_id: str, + auto_config: "AutoConfig", + local_files_only: bool, + ) -> None: + path = snapshot_download( + repo_id=repo_id, + allow_patterns="config.json", + local_files_only=local_files_only, + ) + cls._update_from_dir(path, auto_config) + + @classmethod + def _update_from_dir(cls, path: str, auto_config: "AutoConfig") -> None: + path = (Path(path) / "config.json").resolve() + if path.is_file(): + cls._update_from_file(path, auto_config) + + @classmethod + def _update_from_file(cls, path: str, auto_config: "AutoConfig") -> None: + with open(path) as f: + config = json.load(f) + + auto_config.model_type = config.get("model_type") + params = config.get("task_specific_params", {}) + params = params.get("text-generation", {}) + for name in [ + "top_k", + "top_p", + "temperature", + "repetition_penalty", + "last_n_tokens", + ]: + value = params.get(name) + if value is not None: + setattr(auto_config.config, name, value) + + +class AutoModelForCausalLM: + @classmethod + def from_pretrained( + cls, + model_path_or_repo_id: str, + *, + model_type: Optional[str] = None, + model_file: Optional[str] = None, + config: Optional[AutoConfig] = None, + lib: Optional[str] = None, + local_files_only: bool = False, + **kwargs, + ) -> LLM: + """Loads the language model from a local file or remote repo. + + Args: + model_path_or_repo_id: The path to a model file or directory or the + name of a Hugging Face Hub model repo. + model_type: The model type. + model_file: The name of the model file in repo or directory. + config: `AutoConfig` object. + lib: The path to a shared library or one of `avx2`, `avx`, `basic`. + local_files_only: Whether or not to only look at local files + (i.e., do not try to download the model). + + Returns: + `LLM` object. + """ + config = config or AutoConfig.from_pretrained( + model_path_or_repo_id, + local_files_only=local_files_only, + **kwargs, + ) + model_type = model_type or config.model_type + if not model_type: + raise ValueError( + "Unable to detect model type. Please specify a model type using:\n\n" + " AutoModelForCausalLM.from_pretrained(..., model_type='...')\n\n" + ) + + path_type = get_path_type(model_path_or_repo_id) + model_path = None + if path_type == "file": + model_path = model_path_or_repo_id + elif path_type == "dir": + model_path = cls._find_model_path_from_dir( + model_path_or_repo_id, model_file + ) + elif path_type == "repo": + model_path = cls._find_model_path_from_repo( + model_path_or_repo_id, + model_file, + local_files_only=local_files_only, + ) + + return LLM( + model_path=model_path, + model_type=model_type, + config=config.config, + lib=lib, + ) + + @classmethod + def _find_model_path_from_repo( + cls, + repo_id: str, + filename: Optional[str], + local_files_only: bool, + ) -> str: + if not filename and not local_files_only: + filename = cls._find_model_file_from_repo(repo_id=repo_id) + allow_patterns = filename or "*.bin" + path = snapshot_download( + repo_id=repo_id, + allow_patterns=allow_patterns, + local_files_only=local_files_only, + ) + return cls._find_model_path_from_dir(path, filename=filename) + + @classmethod + def _find_model_file_from_repo(cls, repo_id: str) -> Optional[str]: + api = HfApi() + repo_info = api.repo_info(repo_id=repo_id, files_metadata=True) + files = [ + (f.size, f.rfilename) + for f in repo_info.siblings + if f.rfilename.endswith(".bin") + ] + if not files: + raise ValueError(f"No model file found in repo '{repo_id}'") + return min(files)[1] + + @classmethod + def _find_model_path_from_dir( + cls, + path: str, + filename: Optional[str] = None, + ) -> str: + path = Path(path).resolve() + if filename: + file = (path / filename).resolve() + if not file.is_file(): + raise ValueError(f"Model file '{filename}' not found in '{path}'") + return str(file) + + files = [ + (f.stat().st_size, f) + for f in path.iterdir() + if f.is_file() and f.name.endswith(".bin") + ] + if not files: + raise ValueError(f"No model file found in directory '{path}'") + file = min(files)[1] + return str(file.resolve()) diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/langchain.py b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/langchain.py new file mode 100644 index 0000000000000000000000000000000000000000..eadb57b94089c2061776514e73fcabb8a3a151fc --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/langchain.py @@ -0,0 +1,89 @@ +try: + from langchain.llms.base import LLM +except ImportError: + raise ImportError( + "To use the ctransformers.langchain module, please install the " + "`langchain` python package: `pip install langchain`" + ) + +from typing import Any, Dict, Optional, Sequence + +from pydantic import root_validator +from langchain.callbacks.manager import CallbackManagerForLLMRun + +from ctransformers import AutoModelForCausalLM + + +class CTransformers(LLM): + """Wrapper around the C Transformers LLM interface. + + To use, you should have the `langchain` python package installed. + """ + + client: Any #: :meta private: + + model: str + """The path to a model file or directory or the name of a Hugging Face Hub + model repo.""" + + model_type: Optional[str] = None + """The model type.""" + + model_file: Optional[str] = None + """The name of the model file in repo or directory.""" + + config: Optional[Dict[str, Any]] = None + """The config parameters.""" + + lib: Optional[Any] = None + """The path to a shared library or one of `avx2`, `avx`, `basic`.""" + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return { + "model": self.model, + "model_type": self.model_type, + "model_file": self.model_file, + "config": self.config, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "ctransformers" + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate and load model from a local file or remote repo.""" + config = values["config"] or {} + values["client"] = AutoModelForCausalLM.from_pretrained( + values["model"], + model_type=values["model_type"], + model_file=values["model_file"], + lib=values["lib"], + **config, + ) + return values + + def _call( + self, + prompt: str, + stop: Optional[Sequence[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + ) -> str: + """Generate text from a prompt. + + Args: + prompt: The prompt to generate text from. + stop: A list of sequences to stop generation when encountered. + + Returns: + The generated text. + """ + text = [] + for chunk in self.client(prompt, stop=stop, stream=True): + text.append(chunk) + if run_manager: + run_manager.on_llm_new_token(chunk, verbose=self.verbose) + return "".join(text) diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib.py b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib.py new file mode 100644 index 0000000000000000000000000000000000000000..c241453409dacdab7ad2e43304083a22760db01b --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib.py @@ -0,0 +1,42 @@ +from typing import Optional +import platform +from pathlib import Path + + +def find_library(path: Optional[str] = None) -> str: + lib_directory = Path(__file__).parent.resolve() / "lib" + + if path: + subdirs = [d.name for d in lib_directory.iterdir() if d.is_dir()] + if path not in subdirs: + return path + + system = platform.system() + if not path: + if (lib_directory / "local").is_dir(): + path = "local" + elif platform.processor() == "arm": + # Apple silicon doesn't support AVX/AVX2. + path = "basic" if system == "Darwin" else "" + else: + path = "avx2" + + name = "ctransformers" + if system == "Linux": + name = f"lib{name}.so" + elif system == "Windows": + name = f"{name}.dll" + elif system == "Darwin": + name = f"lib{name}.dylib" + else: + name = "" + + path = lib_directory / path / name + if not path.is_file(): + raise OSError( + "Precompiled binaries are not available for the current platform. " + "Please reinstall from source using:\n\n" + " pip uninstall ctransformers --yes\n" + " pip install ctransformers --no-binary ctransformers\n\n" + ) + return str(path) diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/ctransformers.dll b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..cc7ed9bb6d6cb69146e15b0a51ea6807eae9974a Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/ctransformers.dll differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/libctransformers.dylib b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/libctransformers.dylib new file mode 100644 index 0000000000000000000000000000000000000000..3d9a5d6046f2a50ca4b5e54721ee0bf9e4e8e29f --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/libctransformers.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed063a9cd34374aec9c943b3330aaf476d40a116220fc49c871e50f868d024f +size 1797491 diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/libctransformers.so b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/libctransformers.so new file mode 100644 index 0000000000000000000000000000000000000000..b2a8cf458bc04e9abe82a1ff5c1eefba5e88bce4 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx/libctransformers.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb70a475777b52627511001d4fa2d2ea78d47ca9cb66920c42e31a80859b82e +size 1038856 diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/ctransformers.dll b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..280f4ee2c62d0c0cd8124382b4c4684494c749f1 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/ctransformers.dll differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/libctransformers.dylib b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/libctransformers.dylib new file mode 100644 index 0000000000000000000000000000000000000000..032ad8ddb4e43e46528c587bce491603dbddf9bd --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/libctransformers.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4197c6330368c2c55e8eaf802440a94232ccd59370925edc2fb1ee086acc8258 +size 1781107 diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/libctransformers.so b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/libctransformers.so new file mode 100644 index 0000000000000000000000000000000000000000..60d3f48374116284b207d26773d6b28f789737f0 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/avx2/libctransformers.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56cd4a044bab0ca83e3474be8b5a3042a6a25b57916506fefdc5a8601305a6c5 +size 1042952 diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/ctransformers.dll b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..441d9acc4c6bd05af18a9b8bf881e3fdd1083ed8 Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/ctransformers.dll differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/libctransformers.dylib b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/libctransformers.dylib new file mode 100644 index 0000000000000000000000000000000000000000..93dc0177132d6e591a70824bed794c276e7687fd --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/libctransformers.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a347ed7fd0b513e14d2d9c42a93b53f74cfb328c7b51add8e60a66a8f8c2839e +size 1781107 diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/libctransformers.so b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/libctransformers.so new file mode 100644 index 0000000000000000000000000000000000000000..4109aa1b28665fc325f8a2dbb1c7fded2c7a289c Binary files /dev/null and b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/basic/libctransformers.so differ diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/local/ctransformers.dll b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/local/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..c40e060659a5d729b536577c8c9965ad1c334370 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/lib/local/ctransformers.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b904a216aaa1cba77cfa1dad77bed395441a7ad53fdf4f701f99ca1129fa3cf +size 1616896 diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/llm.py b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/llm.py new file mode 100644 index 0000000000000000000000000000000000000000..aa4d9e0b7fdc0141dfe8cfe3f77836ee6a47d6b3 --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/llm.py @@ -0,0 +1,584 @@ +import inspect +import os +import re +from collections import OrderedDict +from dataclasses import dataclass +from functools import partial +from pathlib import Path +from ctypes import ( + CDLL, + c_bool, + c_int, + c_float, + c_char_p, + c_void_p, + POINTER, +) +from typing import ( + Any, + Callable, + Generator, + List, + Optional, + Sequence, + Union, +) + +from .lib import find_library +from .utils import Vector, utf8_split_incomplete + +c_int_p = POINTER(c_int) +c_float_p = POINTER(c_float) +llm_p = c_void_p + + +@dataclass +class Config: + # sample + top_k: int = 40 + top_p: float = 0.95 + temperature: float = 0.8 + repetition_penalty: float = 1.1 + last_n_tokens: int = 64 + seed: int = -1 + + # eval + batch_size: int = 8 + threads: int = -1 + + # generate + max_new_tokens: int = 256 + stop: Optional[Sequence[str]] = None + stream: bool = False + reset: bool = True + + # model + context_length: int = -1 + gpu_layers: int = 0 + + +docs = OrderedDict( + top_k="The top-k value to use for sampling.", + top_p="The top-p value to use for sampling.", + temperature="The temperature to use for sampling.", + repetition_penalty="The repetition penalty to use for sampling.", + last_n_tokens="The number of last tokens to use for repetition penalty.", + seed="The seed value to use for sampling tokens.", + max_new_tokens="The maximum number of new tokens to generate.", + stop="A list of sequences to stop generation when encountered.", + stream="Whether to stream the generated text.", + reset="Whether to reset the model state before generating text.", + batch_size="The batch size to use for evaluating tokens.", + threads="The number of threads to use for evaluating tokens.", + context_length="The maximum context length to use.", + gpu_layers="The number of layers to run on GPU.", +) + + +def doc(fn): + doc = [] + for param in inspect.signature(fn).parameters: + if param in docs: + default = getattr(Config, param) + doc.append(f"{param}: {docs[param]} Default: `{default}`") + doc = ("\n" + " " * 12).join(doc) + fn.__doc__ = fn.__doc__.format(params=doc) + return fn + + +def get(*values): + for value in values: + if value is not None: + return value + + +def load_library(path: Optional[str] = None) -> Any: + # https://docs.python.org/3.8/whatsnew/3.8.html#bpo-36085-whatsnew + # https://github.com/abetlen/llama-cpp-python/pull/225 + if hasattr(os, "add_dll_directory") and "CUDA_PATH" in os.environ: + os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin")) + + path = find_library(path) + lib = CDLL(path) + + lib.ctransformers_llm_create.argtypes = [ + c_char_p, # model_path + c_char_p, # model_type + c_int, # context_length + c_int, # gpu_layers + ] + lib.ctransformers_llm_create.restype = llm_p + + lib.ctransformers_llm_delete.argtypes = [llm_p] + lib.ctransformers_llm_delete.restype = None + + lib.ctransformers_llm_tokenize.argtypes = [ + llm_p, + c_char_p, # text + c_int_p, # output + ] + lib.ctransformers_llm_tokenize.restype = c_int + + lib.ctransformers_llm_detokenize.argtypes = [ + llm_p, + c_int, # token + ] + lib.ctransformers_llm_detokenize.restype = c_char_p + + lib.ctransformers_llm_is_eos_token.argtypes = [ + llm_p, + c_int, # token + ] + lib.ctransformers_llm_is_eos_token.restype = c_bool + + lib.ctransformers_llm_eos_token_id.argtypes = [llm_p] + lib.ctransformers_llm_eos_token_id.restype = c_int + + lib.ctransformers_llm_vocab_size.argtypes = [llm_p] + lib.ctransformers_llm_vocab_size.restype = c_int + + lib.ctransformers_llm_context_length.argtypes = [llm_p] + lib.ctransformers_llm_context_length.restype = c_int + + lib.ctransformers_llm_batch_eval.argtypes = [ + llm_p, + c_int_p, # tokens + c_int, # n_tokens + c_int, # batch_size + c_int, # threads + ] + lib.ctransformers_llm_batch_eval.restype = c_bool + + lib.ctransformers_llm_logits_data.argtypes = [llm_p] + lib.ctransformers_llm_logits_data.restype = c_float_p + lib.ctransformers_llm_logits_size.argtypes = [llm_p] + lib.ctransformers_llm_logits_size.restype = c_int + + lib.ctransformers_llm_embeddings_data.argtypes = [llm_p] + lib.ctransformers_llm_embeddings_data.restype = c_float_p + lib.ctransformers_llm_embeddings_size.argtypes = [llm_p] + lib.ctransformers_llm_embeddings_size.restype = c_int + + lib.ctransformers_llm_sample.argtypes = [ + llm_p, + c_int, # top_k + c_float, # top_p + c_float, # temperature + c_float, # repetition_penalty + c_int, # last_n_tokens + c_int, # seed + ] + lib.ctransformers_llm_sample.restype = c_int + + lib.ctransformers_llm_reset.argtypes = [llm_p] + lib.ctransformers_llm_reset.restype = None + + return lib + + +class LLM: + def __init__( + self, + model_path: str, + model_type: str, + *, + config: Optional[Config] = None, + lib: Optional[str] = None, + ): + """Loads the language model from a local file. + + Args: + model_path: The path to a model file. + model_type: The model type. + config: `Config` object. + lib: The path to a shared library or one of `avx2`, `avx`, `basic`. + """ + config = config or Config() + self._model_path = model_path + self._model_type = model_type + self._config = config + self._llm = None + self._lib = None + + if not Path(model_path).is_file(): + raise ValueError(f"Model path '{model_path}' doesn't exist.") + + self._lib = load_library(lib) + self._llm = self._lib.ctransformers_llm_create( + model_path.encode(), + model_type.encode(), + config.context_length, + config.gpu_layers, + ) + if self._llm is None: + raise RuntimeError( + f"Failed to create LLM '{model_type}' from '{model_path}'." + ) + + @property + def model_path(self) -> str: + """The path to the model file.""" + return self._model_path + + @property + def model_type(self) -> str: + """The model type.""" + return self._model_type + + @property + def config(self) -> Config: + """The config object.""" + return self._config + + @property + def eos_token_id(self) -> int: + """The end-of-sequence token.""" + return self.ctransformers_llm_eos_token_id() + + @property + def vocab_size(self) -> int: + """The number of tokens in vocabulary.""" + return self.ctransformers_llm_vocab_size() + + @property + def context_length(self) -> int: + """The context length of model.""" + return self.ctransformers_llm_context_length() + + @property + def logits(self) -> List[float]: + """The unnormalized log probabilities.""" + return Vector( + self.ctransformers_llm_logits_data(), + self.ctransformers_llm_logits_size(), + ) + + @property + def embeddings(self) -> List[float]: + """The input embeddings.""" + return Vector( + self.ctransformers_llm_embeddings_data(), + self.ctransformers_llm_embeddings_size(), + ) + + def __getattr__(self, name: str) -> Callable: + lib, llm = self._lib, self._llm + if name.startswith("ctransformers_llm_") and hasattr(lib, name): + return partial(getattr(lib, name), llm) + raise AttributeError(f"'LLM' object has no attribute '{name}'") + + def tokenize(self, text: str) -> List[int]: + """Converts a text into list of tokens. + + Args: + text: The text to tokenize. + + Returns: + The list of tokens. + """ + tokens = (c_int * (len(text) + 1))() + n_tokens = self.ctransformers_llm_tokenize(text.encode(), tokens) + return tokens[:n_tokens] + + def detokenize( + self, + tokens: Sequence[int], + decode: bool = True, + ) -> Union[str, bytes]: + """Converts a list of tokens to text. + + Args: + tokens: The list of tokens. + decode: Whether to decode the text as UTF-8 string. + + Returns: + The combined text of all tokens. + """ + if isinstance(tokens, int): + tokens = [tokens] + texts = [] + for token in tokens: + text = self.ctransformers_llm_detokenize(token) + texts.append(text) + texts = b"".join(texts) + if decode: + texts = texts.decode(errors="ignore") + return texts + + def is_eos_token(self, token: int) -> bool: + """Checks if a token is an end-of-sequence token. + + Args: + token: The token to check. + + Returns: + `True` if the token is an end-of-sequence token else `False`. + """ + return self.ctransformers_llm_is_eos_token(token) + + @doc + def eval( + self, + tokens: Sequence[int], + *, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + ) -> None: + """Evaluates a list of tokens. + + Args: + tokens: The list of tokens to evaluate. + {params} + """ + config = self.config + batch_size = get(batch_size, config.batch_size) + threads = get(threads, config.threads) + + n_tokens = len(tokens) + tokens = (c_int * n_tokens)(*tokens) + status = self.ctransformers_llm_batch_eval( + tokens, + n_tokens, + batch_size, + threads, + ) + if not status: + raise RuntimeError("Failed to evaluate tokens.") + + @doc + def sample( + self, + *, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + ) -> int: + """Samples a token from the model. + + Args: + {params} + + Returns: + The sampled token. + """ + config = self.config + top_k = get(top_k, config.top_k) + top_p = get(top_p, config.top_p) + temperature = get(temperature, config.temperature) + repetition_penalty = get(repetition_penalty, config.repetition_penalty) + last_n_tokens = get(last_n_tokens, config.last_n_tokens) + seed = get(seed, config.seed) + + return self.ctransformers_llm_sample( + top_k, + top_p, + temperature, + repetition_penalty, + last_n_tokens, + seed, + ) + + def reset(self) -> None: + """Resets the model state.""" + self.ctransformers_llm_reset() + + def __del__(self): + if self._llm is not None: + self.ctransformers_llm_delete() + + @doc + def generate( + self, + tokens: Sequence[int], + *, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + reset: Optional[bool] = None, + ) -> Generator[int, None, None]: + """Generates new tokens from a list of tokens. + + Args: + tokens: The list of tokens to generate tokens from. + {params} + + Returns: + The generated tokens. + """ + config = self.config + reset = get(reset, config.reset) + + if reset: + self.reset() + + self.eval(tokens, batch_size=batch_size, threads=threads) + while True: + token = self.sample( + top_k=top_k, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + last_n_tokens=last_n_tokens, + seed=seed, + ) + self.eval([token], batch_size=batch_size, threads=threads) + if self.is_eos_token(token): + break + yield token + + def _stream( + self, + prompt: str, + *, + max_new_tokens: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + stop: Optional[Sequence[str]] = None, + reset: Optional[bool] = None, + ) -> Generator[str, None, None]: + config = self.config + max_new_tokens = get(max_new_tokens, config.max_new_tokens) + stop = get(stop, config.stop) or [] + if isinstance(stop, str): + stop = [stop] + + tokens = self.tokenize(prompt) + + stop_regex = re.compile("|".join(map(re.escape, stop))) + count = 0 + text = "" + incomplete = b"" + for token in self.generate( + tokens, + top_k=top_k, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + last_n_tokens=last_n_tokens, + seed=seed, + batch_size=batch_size, + threads=threads, + reset=reset, + ): + # Handle incomplete UTF-8 multi-byte characters. + incomplete += self.detokenize([token], decode=False) + complete, incomplete = utf8_split_incomplete(incomplete) + text += complete.decode(errors="ignore") + + # https://github.com/abetlen/llama-cpp-python/blob/1a13d76c487df1c8560132d10bda62d6e2f4fa93/llama_cpp/llama.py#L686-L706 + # Check if one of the stop sequences is part of the text. + # Note that the stop sequence may not always be at the end of text. + if stop: + match = stop_regex.search(text) + if match: + text = text[: match.start()] + break + + # Avoid sending the longest suffix of text which is also a prefix + # of a stop sequence, as it can form a stop sequence with the text + # generated later. + longest = 0 + for s in stop: + for i in range(len(s), 0, -1): + if text.endswith(s[:i]): + longest = max(i, longest) + break + + end = len(text) - longest + if end > 0: + yield text[:end] + text = text[end:] + + count += 1 + if count >= max_new_tokens: + break + + if text: + yield text + + @doc + def __call__( + self, + prompt: str, + *, + max_new_tokens: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + stop: Optional[Sequence[str]] = None, + stream: Optional[bool] = None, + reset: Optional[bool] = None, + ) -> Union[str, Generator[str, None, None]]: + """Generates text from a prompt. + + Args: + prompt: The prompt to generate text from. + {params} + + Returns: + The generated text. + """ + config = self.config + stream = get(stream, config.stream) + + text = self._stream( + prompt, + max_new_tokens=max_new_tokens, + top_k=top_k, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + last_n_tokens=last_n_tokens, + seed=seed, + batch_size=batch_size, + threads=threads, + stop=stop, + reset=reset, + ) + if stream: + return text + return "".join(text) + + @doc + def embed( + self, + input: Union[str, Sequence[int]], + *, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + ) -> List[float]: + """Computes embeddings for a text or list of tokens. + + > **Note:** Currently only LLaMA models support embeddings. + + Args: + input: The input text or list of tokens to get embeddings for. + {params} + + Returns: + The input embeddings. + """ + if isinstance(input, str): + input = self.tokenize(input) + self.reset() + self.eval(input, batch_size=batch_size, threads=threads) + return list(self.embeddings) diff --git a/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/utils.py b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..894e4da0d1e29fbaad06e10554edc2e99e65126b --- /dev/null +++ b/ctransformers/_skbuild/win-amd64-3.11/setuptools/lib.win-amd64-cpython-311/ctransformers/utils.py @@ -0,0 +1,48 @@ +from collections.abc import MutableSequence +from typing import Any, Tuple + + +class Vector(MutableSequence): + """Provides a Python list-like interface for a C array to access and modify + data in-place without copying the entire data between C and Python. + """ + + def __init__(self, data: Any, size: int): + self._data = data + self._size = size + + def __getitem__(self, index: int) -> Any: + self._validate_index(index) + return self._data[index] + + def __setitem__(self, index: int, value: Any) -> None: + self._validate_index(index) + self._data[index] = value + + def __len__(self) -> int: + return self._size + + def _validate_index(self, index: int) -> None: + if not isinstance(index, int): + raise TypeError("list index must be integer") + if not 0 <= index < self._size: + raise IndexError("list index out of range") + + def __delitem__(self, index: int) -> None: + raise NotImplementedError("This operation is not allowed.") + + def insert(self, index: int, value: Any) -> None: + raise NotImplementedError("This operation is not allowed.") + + +def utf8_is_continuation_byte(byte: int) -> bool: + """Checks if a byte is a UTF-8 continuation byte (most significant bit is 1).""" + return (byte & 0b10000000) != 0 + + +def utf8_split_incomplete(seq: bytes) -> Tuple[bytes, bytes]: + """Splits a sequence of UTF-8 encoded bytes into complete and incomplete bytes.""" + i = len(seq) + while i > 0 and utf8_is_continuation_byte(seq[i - 1]): + i -= 1 + return seq[:i], seq[i:] diff --git a/ctransformers/ctransformers.egg-info/PKG-INFO b/ctransformers/ctransformers.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..dd15c48ccb5a865b5c5b941d9a58c5626999937b --- /dev/null +++ b/ctransformers/ctransformers.egg-info/PKG-INFO @@ -0,0 +1,520 @@ +Metadata-Version: 2.1 +Name: ctransformers +Version: 0.2.11 +Summary: Python bindings for the Transformer models implemented in C/C++ using GGML library. +Home-page: https://github.com/marella/ctransformers +Author: Ravindra Marella +Author-email: mv.ravindra007@gmail.com +License: MIT +Keywords: ctransformers transformers ai llm +Classifier: Development Status :: 1 - Planning +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Scientific/Engineering +Classifier: Topic :: Scientific/Engineering :: Mathematics +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Classifier: Topic :: Software Development +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Description-Content-Type: text/markdown +Provides-Extra: tests +License-File: LICENSE + +# [C Transformers](https://github.com/marella/ctransformers) [![PyPI](https://img.shields.io/pypi/v/ctransformers)](https://pypi.org/project/ctransformers/) [![tests](https://github.com/marella/ctransformers/actions/workflows/tests.yml/badge.svg)](https://github.com/marella/ctransformers/actions/workflows/tests.yml) [![build](https://github.com/marella/ctransformers/actions/workflows/build.yml/badge.svg)](https://github.com/marella/ctransformers/actions/workflows/build.yml) + +Python bindings for the Transformer models implemented in C/C++ using [GGML](https://github.com/ggerganov/ggml) library. + +> Also see [ChatDocs](https://github.com/marella/chatdocs) + +- [Supported Models](#supported-models) +- [Installation](#installation) +- [Usage](#usage) + - [Hugging Face Hub](#hugging-face-hub) + - [LangChain](#langchain) + - [GPU](#gpu) +- [Documentation](#documentation) +- [License](#license) + +## Supported Models + +| Models | Model Type | +| :-------------------- | ----------- | +| GPT-2 | `gpt2` | +| GPT-J, GPT4All-J | `gptj` | +| GPT-NeoX, StableLM | `gpt_neox` | +| LLaMA | `llama` | +| MPT | `mpt` | +| Dolly V2 | `dolly-v2` | +| StarCoder, StarChat | `starcoder` | +| Falcon (Experimental) | `falcon` | + +## Installation + +```sh +pip install ctransformers +``` + +For GPU (CUDA) support, set environment variable `CT_CUBLAS=1` and install from source using: + +```sh +CT_CUBLAS=1 pip install ctransformers --no-binary ctransformers +``` + +
+Show commands for Windows
+ +On Windows PowerShell run: + +```sh +$env:CT_CUBLAS=1 +pip install ctransformers --no-binary ctransformers +``` + +On Windows Command Prompt run: + +```sh +set CT_CUBLAS=1 +pip install ctransformers --no-binary ctransformers +``` + +
+ +## Usage + +It provides a unified interface for all models: + +```py +from ctransformers import AutoModelForCausalLM + +llm = AutoModelForCausalLM.from_pretrained('/path/to/ggml-gpt-2.bin', model_type='gpt2') + +print(llm('AI is going to')) +``` + +[Run in Google Colab](https://colab.research.google.com/drive/1GMhYMUAv_TyZkpfvUI1NirM8-9mCXQyL) + +If you are getting `illegal instruction` error, try using `lib='avx'` or `lib='basic'`: + +```py +llm = AutoModelForCausalLM.from_pretrained('/path/to/ggml-gpt-2.bin', model_type='gpt2', lib='avx') +``` + +It provides a generator interface for more control: + +```py +tokens = llm.tokenize('AI is going to') + +for token in llm.generate(tokens): + print(llm.detokenize(token)) +``` + +It can be used with a custom or Hugging Face tokenizer: + +```py +from transformers import AutoTokenizer + +tokenizer = AutoTokenizer.from_pretrained('gpt2') + +tokens = tokenizer.encode('AI is going to') + +for token in llm.generate(tokens): + print(tokenizer.decode(token)) +``` + +It also provides access to the low-level C API. See [Documentation](#documentation) section below. + +### Hugging Face Hub + +It can be used with models hosted on the Hub: + +```py +llm = AutoModelForCausalLM.from_pretrained('marella/gpt-2-ggml') +``` + +If a model repo has multiple model files (`.bin` files), specify a model file using: + +```py +llm = AutoModelForCausalLM.from_pretrained('marella/gpt-2-ggml', model_file='ggml-model.bin') +``` + +It can be used with your own models uploaded on the Hub. For better user experience, upload only one model per repo. + +To use it with your own model, add `config.json` file to your model repo specifying the `model_type`: + +```json +{ + "model_type": "gpt2" +} +``` + +You can also specify additional parameters under `task_specific_params.text-generation`. + +See [marella/gpt-2-ggml](https://huggingface.co/marella/gpt-2-ggml/blob/main/config.json) for a minimal example and [marella/gpt-2-ggml-example](https://huggingface.co/marella/gpt-2-ggml-example/blob/main/config.json) for a full example. + +### LangChain + +It is integrated into LangChain. See [LangChain docs](https://python.langchain.com/docs/ecosystem/integrations/ctransformers). + +### GPU + +> **Note:** Currently only LLaMA models have GPU support. + +To run some of the model layers on GPU, set the `gpu_layers` parameter: + +```py +llm = AutoModelForCausalLM.from_pretrained('/path/to/ggml-llama.bin', model_type='llama', gpu_layers=50) +``` + +[Run in Google Colab](https://colab.research.google.com/drive/1Ihn7iPCYiqlTotpkqa1tOhUIpJBrJ1Tp) + +## Documentation + + + +### Config + +| Parameter | Type | Description | Default | +| :------------------- | :---------- | :------------------------------------------------------- | :------ | +| `top_k` | `int` | The top-k value to use for sampling. | `40` | +| `top_p` | `float` | The top-p value to use for sampling. | `0.95` | +| `temperature` | `float` | The temperature to use for sampling. | `0.8` | +| `repetition_penalty` | `float` | The repetition penalty to use for sampling. | `1.1` | +| `last_n_tokens` | `int` | The number of last tokens to use for repetition penalty. | `64` | +| `seed` | `int` | The seed value to use for sampling tokens. | `-1` | +| `max_new_tokens` | `int` | The maximum number of new tokens to generate. | `256` | +| `stop` | `List[str]` | A list of sequences to stop generation when encountered. | `None` | +| `stream` | `bool` | Whether to stream the generated text. | `False` | +| `reset` | `bool` | Whether to reset the model state before generating text. | `True` | +| `batch_size` | `int` | The batch size to use for evaluating tokens. | `8` | +| `threads` | `int` | The number of threads to use for evaluating tokens. | `-1` | +| `context_length` | `int` | The maximum context length to use. | `-1` | +| `gpu_layers` | `int` | The number of layers to run on GPU. | `0` | + +> **Note:** Currently only LLaMA and MPT models support the `context_length` parameter and only LLaMA models support the `gpu_layers` parameter. + +### class `AutoModelForCausalLM` + +--- + +#### classmethod `AutoModelForCausalLM.from_pretrained` + +```python +from_pretrained( + model_path_or_repo_id: str, + model_type: Optional[str] = None, + model_file: Optional[str] = None, + config: Optional[ctransformers.hub.AutoConfig] = None, + lib: Optional[str] = None, + local_files_only: bool = False, + **kwargs +) → LLM +``` + +Loads the language model from a local file or remote repo. + +**Args:** + +- `model_path_or_repo_id`: The path to a model file or directory or the name of a Hugging Face Hub model repo. +- `model_type`: The model type. +- `model_file`: The name of the model file in repo or directory. +- `config`: `AutoConfig` object. +- `lib`: The path to a shared library or one of `avx2`, `avx`, `basic`. +- `local_files_only`: Whether or not to only look at local files (i.e., do not try to download the model). + +**Returns:** +`LLM` object. + +### class `LLM` + +### method `LLM.__init__` + +```python +__init__( + model_path: str, + model_type: str, + config: Optional[ctransformers.llm.Config] = None, + lib: Optional[str] = None +) +``` + +Loads the language model from a local file. + +**Args:** + +- `model_path`: The path to a model file. +- `model_type`: The model type. +- `config`: `Config` object. +- `lib`: The path to a shared library or one of `avx2`, `avx`, `basic`. + +--- + +##### property LLM.config + +The config object. + +--- + +##### property LLM.context_length + +The context length of model. + +--- + +##### property LLM.embeddings + +The input embeddings. + +--- + +##### property LLM.eos_token_id + +The end-of-sequence token. + +--- + +##### property LLM.logits + +The unnormalized log probabilities. + +--- + +##### property LLM.model_path + +The path to the model file. + +--- + +##### property LLM.model_type + +The model type. + +--- + +##### property LLM.vocab_size + +The number of tokens in vocabulary. + +--- + +#### method `LLM.detokenize` + +```python +detokenize(tokens: Sequence[int], decode: bool = True) → Union[str, bytes] +``` + +Converts a list of tokens to text. + +**Args:** + +- `tokens`: The list of tokens. +- `decode`: Whether to decode the text as UTF-8 string. + +**Returns:** +The combined text of all tokens. + +--- + +#### method `LLM.embed` + +```python +embed( + input: Union[str, Sequence[int]], + batch_size: Optional[int] = None, + threads: Optional[int] = None +) → List[float] +``` + +Computes embeddings for a text or list of tokens. + +> **Note:** Currently only LLaMA models support embeddings. + +**Args:** + +- `input`: The input text or list of tokens to get embeddings for. +- `batch_size`: The batch size to use for evaluating tokens. Default: `8` +- `threads`: The number of threads to use for evaluating tokens. Default: `-1` + +**Returns:** +The input embeddings. + +--- + +#### method `LLM.eval` + +```python +eval( + tokens: Sequence[int], + batch_size: Optional[int] = None, + threads: Optional[int] = None +) → None +``` + +Evaluates a list of tokens. + +**Args:** + +- `tokens`: The list of tokens to evaluate. +- `batch_size`: The batch size to use for evaluating tokens. Default: `8` +- `threads`: The number of threads to use for evaluating tokens. Default: `-1` + +--- + +#### method `LLM.generate` + +```python +generate( + tokens: Sequence[int], + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + reset: Optional[bool] = None +) → Generator[int, NoneType, NoneType] +``` + +Generates new tokens from a list of tokens. + +**Args:** + +- `tokens`: The list of tokens to generate tokens from. +- `top_k`: The top-k value to use for sampling. Default: `40` +- `top_p`: The top-p value to use for sampling. Default: `0.95` +- `temperature`: The temperature to use for sampling. Default: `0.8` +- `repetition_penalty`: The repetition penalty to use for sampling. Default: `1.1` +- `last_n_tokens`: The number of last tokens to use for repetition penalty. Default: `64` +- `seed`: The seed value to use for sampling tokens. Default: `-1` +- `batch_size`: The batch size to use for evaluating tokens. Default: `8` +- `threads`: The number of threads to use for evaluating tokens. Default: `-1` +- `reset`: Whether to reset the model state before generating text. Default: `True` + +**Returns:** +The generated tokens. + +--- + +#### method `LLM.is_eos_token` + +```python +is_eos_token(token: int) → bool +``` + +Checks if a token is an end-of-sequence token. + +**Args:** + +- `token`: The token to check. + +**Returns:** +`True` if the token is an end-of-sequence token else `False`. + +--- + +#### method `LLM.reset` + +```python +reset() → None +``` + +Resets the model state. + +--- + +#### method `LLM.sample` + +```python +sample( + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None +) → int +``` + +Samples a token from the model. + +**Args:** + +- `top_k`: The top-k value to use for sampling. Default: `40` +- `top_p`: The top-p value to use for sampling. Default: `0.95` +- `temperature`: The temperature to use for sampling. Default: `0.8` +- `repetition_penalty`: The repetition penalty to use for sampling. Default: `1.1` +- `last_n_tokens`: The number of last tokens to use for repetition penalty. Default: `64` +- `seed`: The seed value to use for sampling tokens. Default: `-1` + +**Returns:** +The sampled token. + +--- + +#### method `LLM.tokenize` + +```python +tokenize(text: str) → List[int] +``` + +Converts a text into list of tokens. + +**Args:** + +- `text`: The text to tokenize. + +**Returns:** +The list of tokens. + +--- + +#### method `LLM.__call__` + +```python +__call__( + prompt: str, + max_new_tokens: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + stop: Optional[Sequence[str]] = None, + stream: Optional[bool] = None, + reset: Optional[bool] = None +) → Union[str, Generator[str, NoneType, NoneType]] +``` + +Generates text from a prompt. + +**Args:** + +- `prompt`: The prompt to generate text from. +- `max_new_tokens`: The maximum number of new tokens to generate. Default: `256` +- `top_k`: The top-k value to use for sampling. Default: `40` +- `top_p`: The top-p value to use for sampling. Default: `0.95` +- `temperature`: The temperature to use for sampling. Default: `0.8` +- `repetition_penalty`: The repetition penalty to use for sampling. Default: `1.1` +- `last_n_tokens`: The number of last tokens to use for repetition penalty. Default: `64` +- `seed`: The seed value to use for sampling tokens. Default: `-1` +- `batch_size`: The batch size to use for evaluating tokens. Default: `8` +- `threads`: The number of threads to use for evaluating tokens. Default: `-1` +- `stop`: A list of sequences to stop generation when encountered. Default: `None` +- `stream`: Whether to stream the generated text. Default: `False` +- `reset`: Whether to reset the model state before generating text. Default: `True` + +**Returns:** +The generated text. + + + +## License + +[MIT](https://github.com/marella/ctransformers/blob/main/LICENSE) diff --git a/ctransformers/ctransformers.egg-info/SOURCES.txt b/ctransformers/ctransformers.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..67ddf7aa4f911c12cad5fa14bdf538285d9cd8ac --- /dev/null +++ b/ctransformers/ctransformers.egg-info/SOURCES.txt @@ -0,0 +1,43 @@ +LICENSE +README.md +pyproject.toml +setup.py +_skbuild/win-amd64-3.11/cmake-install/ctransformers/__init__.py +_skbuild/win-amd64-3.11/cmake-install/ctransformers/hub.py +_skbuild/win-amd64-3.11/cmake-install/ctransformers/langchain.py +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib.py +_skbuild/win-amd64-3.11/cmake-install/ctransformers/llm.py +_skbuild/win-amd64-3.11/cmake-install/ctransformers/utils.py +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/ctransformers.dll +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.dylib +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx/libctransformers.so +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/ctransformers.dll +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.dylib +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/avx2/libctransformers.so +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/ctransformers.dll +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/libctransformers.dylib +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/basic/libctransformers.so +_skbuild/win-amd64-3.11/cmake-install/ctransformers/lib/local/ctransformers.dll +ctransformers/__init__.py +ctransformers/hub.py +ctransformers/langchain.py +ctransformers/lib.py +ctransformers/llm.py +ctransformers/utils.py +ctransformers.egg-info/PKG-INFO +ctransformers.egg-info/SOURCES.txt +ctransformers.egg-info/dependency_links.txt +ctransformers.egg-info/not-zip-safe +ctransformers.egg-info/requires.txt +ctransformers.egg-info/top_level.txt +ctransformers/lib/avx/ctransformers.dll +ctransformers/lib/avx/libctransformers.dylib +ctransformers/lib/avx/libctransformers.so +ctransformers/lib/avx2/ctransformers.dll +ctransformers/lib/avx2/libctransformers.dylib +ctransformers/lib/avx2/libctransformers.so +ctransformers/lib/basic/ctransformers.dll +ctransformers/lib/basic/libctransformers.dylib +ctransformers/lib/basic/libctransformers.so +tests/test_llm.py +tests/test_model.py \ No newline at end of file diff --git a/ctransformers/ctransformers.egg-info/dependency_links.txt b/ctransformers/ctransformers.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/ctransformers/ctransformers.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/ctransformers/ctransformers.egg-info/not-zip-safe b/ctransformers/ctransformers.egg-info/not-zip-safe new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/ctransformers/ctransformers.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/ctransformers/ctransformers.egg-info/requires.txt b/ctransformers/ctransformers.egg-info/requires.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b0ba529096f0a5b2273f4018f3b843de57faec7 --- /dev/null +++ b/ctransformers/ctransformers.egg-info/requires.txt @@ -0,0 +1,4 @@ +huggingface-hub + +[tests] +pytest diff --git a/ctransformers/ctransformers.egg-info/top_level.txt b/ctransformers/ctransformers.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..b06a12c54335e9af172f8bf15e0dcdf8f008fecf --- /dev/null +++ b/ctransformers/ctransformers.egg-info/top_level.txt @@ -0,0 +1 @@ +ctransformers diff --git a/ctransformers/ctransformers/__init__.py b/ctransformers/ctransformers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..48e85bf19d65722700878f3f993350a8938b9d64 --- /dev/null +++ b/ctransformers/ctransformers/__init__.py @@ -0,0 +1,2 @@ +from .llm import Config, LLM +from .hub import AutoConfig, AutoModelForCausalLM diff --git a/ctransformers/ctransformers/hub.py b/ctransformers/ctransformers/hub.py new file mode 100644 index 0000000000000000000000000000000000000000..86e3129f4a9057a3447f76dd44f59a3772a3321d --- /dev/null +++ b/ctransformers/ctransformers/hub.py @@ -0,0 +1,215 @@ +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +from huggingface_hub import HfApi, snapshot_download +from huggingface_hub.utils import validate_repo_id, HFValidationError + +from .llm import Config, LLM + + +def get_path_type(path: str) -> Optional[str]: + p = Path(path) + if p.is_file(): + return "file" + elif p.is_dir(): + return "dir" + try: + validate_repo_id(path) + return "repo" + except HFValidationError: + pass + + +@dataclass +class AutoConfig: + config: Config + model_type: Optional[str] = None + + @classmethod + def from_pretrained( + cls, + model_path_or_repo_id: str, + local_files_only: bool = False, + **kwargs, + ) -> "AutoConfig": + path_type = get_path_type(model_path_or_repo_id) + if not path_type: + raise ValueError(f"Model path '{model_path_or_repo_id}' doesn't exist.") + + config = Config() + auto_config = AutoConfig(config=config) + + if path_type == "dir": + cls._update_from_dir(model_path_or_repo_id, auto_config) + elif path_type == "repo": + cls._update_from_repo( + model_path_or_repo_id, + auto_config, + local_files_only=local_files_only, + ) + + for k, v in kwargs.items(): + if not hasattr(config, k): + raise TypeError( + f"'{k}' is an invalid keyword argument for from_pretrained()" + ) + setattr(config, k, v) + + return auto_config + + @classmethod + def _update_from_repo( + cls, + repo_id: str, + auto_config: "AutoConfig", + local_files_only: bool, + ) -> None: + path = snapshot_download( + repo_id=repo_id, + allow_patterns="config.json", + local_files_only=local_files_only, + ) + cls._update_from_dir(path, auto_config) + + @classmethod + def _update_from_dir(cls, path: str, auto_config: "AutoConfig") -> None: + path = (Path(path) / "config.json").resolve() + if path.is_file(): + cls._update_from_file(path, auto_config) + + @classmethod + def _update_from_file(cls, path: str, auto_config: "AutoConfig") -> None: + with open(path) as f: + config = json.load(f) + + auto_config.model_type = config.get("model_type") + params = config.get("task_specific_params", {}) + params = params.get("text-generation", {}) + for name in [ + "top_k", + "top_p", + "temperature", + "repetition_penalty", + "last_n_tokens", + ]: + value = params.get(name) + if value is not None: + setattr(auto_config.config, name, value) + + +class AutoModelForCausalLM: + @classmethod + def from_pretrained( + cls, + model_path_or_repo_id: str, + *, + model_type: Optional[str] = None, + model_file: Optional[str] = None, + config: Optional[AutoConfig] = None, + lib: Optional[str] = None, + local_files_only: bool = False, + **kwargs, + ) -> LLM: + """Loads the language model from a local file or remote repo. + + Args: + model_path_or_repo_id: The path to a model file or directory or the + name of a Hugging Face Hub model repo. + model_type: The model type. + model_file: The name of the model file in repo or directory. + config: `AutoConfig` object. + lib: The path to a shared library or one of `avx2`, `avx`, `basic`. + local_files_only: Whether or not to only look at local files + (i.e., do not try to download the model). + + Returns: + `LLM` object. + """ + config = config or AutoConfig.from_pretrained( + model_path_or_repo_id, + local_files_only=local_files_only, + **kwargs, + ) + model_type = model_type or config.model_type + if not model_type: + raise ValueError( + "Unable to detect model type. Please specify a model type using:\n\n" + " AutoModelForCausalLM.from_pretrained(..., model_type='...')\n\n" + ) + + path_type = get_path_type(model_path_or_repo_id) + model_path = None + if path_type == "file": + model_path = model_path_or_repo_id + elif path_type == "dir": + model_path = cls._find_model_path_from_dir( + model_path_or_repo_id, model_file + ) + elif path_type == "repo": + model_path = cls._find_model_path_from_repo( + model_path_or_repo_id, + model_file, + local_files_only=local_files_only, + ) + + return LLM( + model_path=model_path, + model_type=model_type, + config=config.config, + lib=lib, + ) + + @classmethod + def _find_model_path_from_repo( + cls, + repo_id: str, + filename: Optional[str], + local_files_only: bool, + ) -> str: + if not filename and not local_files_only: + filename = cls._find_model_file_from_repo(repo_id=repo_id) + allow_patterns = filename or "*.bin" + path = snapshot_download( + repo_id=repo_id, + allow_patterns=allow_patterns, + local_files_only=local_files_only, + ) + return cls._find_model_path_from_dir(path, filename=filename) + + @classmethod + def _find_model_file_from_repo(cls, repo_id: str) -> Optional[str]: + api = HfApi() + repo_info = api.repo_info(repo_id=repo_id, files_metadata=True) + files = [ + (f.size, f.rfilename) + for f in repo_info.siblings + if f.rfilename.endswith(".bin") + ] + if not files: + raise ValueError(f"No model file found in repo '{repo_id}'") + return min(files)[1] + + @classmethod + def _find_model_path_from_dir( + cls, + path: str, + filename: Optional[str] = None, + ) -> str: + path = Path(path).resolve() + if filename: + file = (path / filename).resolve() + if not file.is_file(): + raise ValueError(f"Model file '{filename}' not found in '{path}'") + return str(file) + + files = [ + (f.stat().st_size, f) + for f in path.iterdir() + if f.is_file() and f.name.endswith(".bin") + ] + if not files: + raise ValueError(f"No model file found in directory '{path}'") + file = min(files)[1] + return str(file.resolve()) diff --git a/ctransformers/ctransformers/langchain.py b/ctransformers/ctransformers/langchain.py new file mode 100644 index 0000000000000000000000000000000000000000..eadb57b94089c2061776514e73fcabb8a3a151fc --- /dev/null +++ b/ctransformers/ctransformers/langchain.py @@ -0,0 +1,89 @@ +try: + from langchain.llms.base import LLM +except ImportError: + raise ImportError( + "To use the ctransformers.langchain module, please install the " + "`langchain` python package: `pip install langchain`" + ) + +from typing import Any, Dict, Optional, Sequence + +from pydantic import root_validator +from langchain.callbacks.manager import CallbackManagerForLLMRun + +from ctransformers import AutoModelForCausalLM + + +class CTransformers(LLM): + """Wrapper around the C Transformers LLM interface. + + To use, you should have the `langchain` python package installed. + """ + + client: Any #: :meta private: + + model: str + """The path to a model file or directory or the name of a Hugging Face Hub + model repo.""" + + model_type: Optional[str] = None + """The model type.""" + + model_file: Optional[str] = None + """The name of the model file in repo or directory.""" + + config: Optional[Dict[str, Any]] = None + """The config parameters.""" + + lib: Optional[Any] = None + """The path to a shared library or one of `avx2`, `avx`, `basic`.""" + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return { + "model": self.model, + "model_type": self.model_type, + "model_file": self.model_file, + "config": self.config, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "ctransformers" + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate and load model from a local file or remote repo.""" + config = values["config"] or {} + values["client"] = AutoModelForCausalLM.from_pretrained( + values["model"], + model_type=values["model_type"], + model_file=values["model_file"], + lib=values["lib"], + **config, + ) + return values + + def _call( + self, + prompt: str, + stop: Optional[Sequence[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + ) -> str: + """Generate text from a prompt. + + Args: + prompt: The prompt to generate text from. + stop: A list of sequences to stop generation when encountered. + + Returns: + The generated text. + """ + text = [] + for chunk in self.client(prompt, stop=stop, stream=True): + text.append(chunk) + if run_manager: + run_manager.on_llm_new_token(chunk, verbose=self.verbose) + return "".join(text) diff --git a/ctransformers/ctransformers/lib.py b/ctransformers/ctransformers/lib.py new file mode 100644 index 0000000000000000000000000000000000000000..c241453409dacdab7ad2e43304083a22760db01b --- /dev/null +++ b/ctransformers/ctransformers/lib.py @@ -0,0 +1,42 @@ +from typing import Optional +import platform +from pathlib import Path + + +def find_library(path: Optional[str] = None) -> str: + lib_directory = Path(__file__).parent.resolve() / "lib" + + if path: + subdirs = [d.name for d in lib_directory.iterdir() if d.is_dir()] + if path not in subdirs: + return path + + system = platform.system() + if not path: + if (lib_directory / "local").is_dir(): + path = "local" + elif platform.processor() == "arm": + # Apple silicon doesn't support AVX/AVX2. + path = "basic" if system == "Darwin" else "" + else: + path = "avx2" + + name = "ctransformers" + if system == "Linux": + name = f"lib{name}.so" + elif system == "Windows": + name = f"{name}.dll" + elif system == "Darwin": + name = f"lib{name}.dylib" + else: + name = "" + + path = lib_directory / path / name + if not path.is_file(): + raise OSError( + "Precompiled binaries are not available for the current platform. " + "Please reinstall from source using:\n\n" + " pip uninstall ctransformers --yes\n" + " pip install ctransformers --no-binary ctransformers\n\n" + ) + return str(path) diff --git a/ctransformers/ctransformers/lib/avx/ctransformers.dll b/ctransformers/ctransformers/lib/avx/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..cc7ed9bb6d6cb69146e15b0a51ea6807eae9974a Binary files /dev/null and b/ctransformers/ctransformers/lib/avx/ctransformers.dll differ diff --git a/ctransformers/ctransformers/lib/avx/libctransformers.dylib b/ctransformers/ctransformers/lib/avx/libctransformers.dylib new file mode 100644 index 0000000000000000000000000000000000000000..3d9a5d6046f2a50ca4b5e54721ee0bf9e4e8e29f --- /dev/null +++ b/ctransformers/ctransformers/lib/avx/libctransformers.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed063a9cd34374aec9c943b3330aaf476d40a116220fc49c871e50f868d024f +size 1797491 diff --git a/ctransformers/ctransformers/lib/avx/libctransformers.so b/ctransformers/ctransformers/lib/avx/libctransformers.so new file mode 100644 index 0000000000000000000000000000000000000000..b2a8cf458bc04e9abe82a1ff5c1eefba5e88bce4 --- /dev/null +++ b/ctransformers/ctransformers/lib/avx/libctransformers.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb70a475777b52627511001d4fa2d2ea78d47ca9cb66920c42e31a80859b82e +size 1038856 diff --git a/ctransformers/ctransformers/lib/avx2/ctransformers.dll b/ctransformers/ctransformers/lib/avx2/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..280f4ee2c62d0c0cd8124382b4c4684494c749f1 Binary files /dev/null and b/ctransformers/ctransformers/lib/avx2/ctransformers.dll differ diff --git a/ctransformers/ctransformers/lib/avx2/libctransformers.dylib b/ctransformers/ctransformers/lib/avx2/libctransformers.dylib new file mode 100644 index 0000000000000000000000000000000000000000..032ad8ddb4e43e46528c587bce491603dbddf9bd --- /dev/null +++ b/ctransformers/ctransformers/lib/avx2/libctransformers.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4197c6330368c2c55e8eaf802440a94232ccd59370925edc2fb1ee086acc8258 +size 1781107 diff --git a/ctransformers/ctransformers/lib/avx2/libctransformers.so b/ctransformers/ctransformers/lib/avx2/libctransformers.so new file mode 100644 index 0000000000000000000000000000000000000000..60d3f48374116284b207d26773d6b28f789737f0 --- /dev/null +++ b/ctransformers/ctransformers/lib/avx2/libctransformers.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56cd4a044bab0ca83e3474be8b5a3042a6a25b57916506fefdc5a8601305a6c5 +size 1042952 diff --git a/ctransformers/ctransformers/lib/basic/ctransformers.dll b/ctransformers/ctransformers/lib/basic/ctransformers.dll new file mode 100644 index 0000000000000000000000000000000000000000..441d9acc4c6bd05af18a9b8bf881e3fdd1083ed8 Binary files /dev/null and b/ctransformers/ctransformers/lib/basic/ctransformers.dll differ diff --git a/ctransformers/ctransformers/lib/basic/libctransformers.dylib b/ctransformers/ctransformers/lib/basic/libctransformers.dylib new file mode 100644 index 0000000000000000000000000000000000000000..93dc0177132d6e591a70824bed794c276e7687fd --- /dev/null +++ b/ctransformers/ctransformers/lib/basic/libctransformers.dylib @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a347ed7fd0b513e14d2d9c42a93b53f74cfb328c7b51add8e60a66a8f8c2839e +size 1781107 diff --git a/ctransformers/ctransformers/lib/basic/libctransformers.so b/ctransformers/ctransformers/lib/basic/libctransformers.so new file mode 100644 index 0000000000000000000000000000000000000000..4109aa1b28665fc325f8a2dbb1c7fded2c7a289c Binary files /dev/null and b/ctransformers/ctransformers/lib/basic/libctransformers.so differ diff --git a/ctransformers/ctransformers/llm.py b/ctransformers/ctransformers/llm.py new file mode 100644 index 0000000000000000000000000000000000000000..aa4d9e0b7fdc0141dfe8cfe3f77836ee6a47d6b3 --- /dev/null +++ b/ctransformers/ctransformers/llm.py @@ -0,0 +1,584 @@ +import inspect +import os +import re +from collections import OrderedDict +from dataclasses import dataclass +from functools import partial +from pathlib import Path +from ctypes import ( + CDLL, + c_bool, + c_int, + c_float, + c_char_p, + c_void_p, + POINTER, +) +from typing import ( + Any, + Callable, + Generator, + List, + Optional, + Sequence, + Union, +) + +from .lib import find_library +from .utils import Vector, utf8_split_incomplete + +c_int_p = POINTER(c_int) +c_float_p = POINTER(c_float) +llm_p = c_void_p + + +@dataclass +class Config: + # sample + top_k: int = 40 + top_p: float = 0.95 + temperature: float = 0.8 + repetition_penalty: float = 1.1 + last_n_tokens: int = 64 + seed: int = -1 + + # eval + batch_size: int = 8 + threads: int = -1 + + # generate + max_new_tokens: int = 256 + stop: Optional[Sequence[str]] = None + stream: bool = False + reset: bool = True + + # model + context_length: int = -1 + gpu_layers: int = 0 + + +docs = OrderedDict( + top_k="The top-k value to use for sampling.", + top_p="The top-p value to use for sampling.", + temperature="The temperature to use for sampling.", + repetition_penalty="The repetition penalty to use for sampling.", + last_n_tokens="The number of last tokens to use for repetition penalty.", + seed="The seed value to use for sampling tokens.", + max_new_tokens="The maximum number of new tokens to generate.", + stop="A list of sequences to stop generation when encountered.", + stream="Whether to stream the generated text.", + reset="Whether to reset the model state before generating text.", + batch_size="The batch size to use for evaluating tokens.", + threads="The number of threads to use for evaluating tokens.", + context_length="The maximum context length to use.", + gpu_layers="The number of layers to run on GPU.", +) + + +def doc(fn): + doc = [] + for param in inspect.signature(fn).parameters: + if param in docs: + default = getattr(Config, param) + doc.append(f"{param}: {docs[param]} Default: `{default}`") + doc = ("\n" + " " * 12).join(doc) + fn.__doc__ = fn.__doc__.format(params=doc) + return fn + + +def get(*values): + for value in values: + if value is not None: + return value + + +def load_library(path: Optional[str] = None) -> Any: + # https://docs.python.org/3.8/whatsnew/3.8.html#bpo-36085-whatsnew + # https://github.com/abetlen/llama-cpp-python/pull/225 + if hasattr(os, "add_dll_directory") and "CUDA_PATH" in os.environ: + os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin")) + + path = find_library(path) + lib = CDLL(path) + + lib.ctransformers_llm_create.argtypes = [ + c_char_p, # model_path + c_char_p, # model_type + c_int, # context_length + c_int, # gpu_layers + ] + lib.ctransformers_llm_create.restype = llm_p + + lib.ctransformers_llm_delete.argtypes = [llm_p] + lib.ctransformers_llm_delete.restype = None + + lib.ctransformers_llm_tokenize.argtypes = [ + llm_p, + c_char_p, # text + c_int_p, # output + ] + lib.ctransformers_llm_tokenize.restype = c_int + + lib.ctransformers_llm_detokenize.argtypes = [ + llm_p, + c_int, # token + ] + lib.ctransformers_llm_detokenize.restype = c_char_p + + lib.ctransformers_llm_is_eos_token.argtypes = [ + llm_p, + c_int, # token + ] + lib.ctransformers_llm_is_eos_token.restype = c_bool + + lib.ctransformers_llm_eos_token_id.argtypes = [llm_p] + lib.ctransformers_llm_eos_token_id.restype = c_int + + lib.ctransformers_llm_vocab_size.argtypes = [llm_p] + lib.ctransformers_llm_vocab_size.restype = c_int + + lib.ctransformers_llm_context_length.argtypes = [llm_p] + lib.ctransformers_llm_context_length.restype = c_int + + lib.ctransformers_llm_batch_eval.argtypes = [ + llm_p, + c_int_p, # tokens + c_int, # n_tokens + c_int, # batch_size + c_int, # threads + ] + lib.ctransformers_llm_batch_eval.restype = c_bool + + lib.ctransformers_llm_logits_data.argtypes = [llm_p] + lib.ctransformers_llm_logits_data.restype = c_float_p + lib.ctransformers_llm_logits_size.argtypes = [llm_p] + lib.ctransformers_llm_logits_size.restype = c_int + + lib.ctransformers_llm_embeddings_data.argtypes = [llm_p] + lib.ctransformers_llm_embeddings_data.restype = c_float_p + lib.ctransformers_llm_embeddings_size.argtypes = [llm_p] + lib.ctransformers_llm_embeddings_size.restype = c_int + + lib.ctransformers_llm_sample.argtypes = [ + llm_p, + c_int, # top_k + c_float, # top_p + c_float, # temperature + c_float, # repetition_penalty + c_int, # last_n_tokens + c_int, # seed + ] + lib.ctransformers_llm_sample.restype = c_int + + lib.ctransformers_llm_reset.argtypes = [llm_p] + lib.ctransformers_llm_reset.restype = None + + return lib + + +class LLM: + def __init__( + self, + model_path: str, + model_type: str, + *, + config: Optional[Config] = None, + lib: Optional[str] = None, + ): + """Loads the language model from a local file. + + Args: + model_path: The path to a model file. + model_type: The model type. + config: `Config` object. + lib: The path to a shared library or one of `avx2`, `avx`, `basic`. + """ + config = config or Config() + self._model_path = model_path + self._model_type = model_type + self._config = config + self._llm = None + self._lib = None + + if not Path(model_path).is_file(): + raise ValueError(f"Model path '{model_path}' doesn't exist.") + + self._lib = load_library(lib) + self._llm = self._lib.ctransformers_llm_create( + model_path.encode(), + model_type.encode(), + config.context_length, + config.gpu_layers, + ) + if self._llm is None: + raise RuntimeError( + f"Failed to create LLM '{model_type}' from '{model_path}'." + ) + + @property + def model_path(self) -> str: + """The path to the model file.""" + return self._model_path + + @property + def model_type(self) -> str: + """The model type.""" + return self._model_type + + @property + def config(self) -> Config: + """The config object.""" + return self._config + + @property + def eos_token_id(self) -> int: + """The end-of-sequence token.""" + return self.ctransformers_llm_eos_token_id() + + @property + def vocab_size(self) -> int: + """The number of tokens in vocabulary.""" + return self.ctransformers_llm_vocab_size() + + @property + def context_length(self) -> int: + """The context length of model.""" + return self.ctransformers_llm_context_length() + + @property + def logits(self) -> List[float]: + """The unnormalized log probabilities.""" + return Vector( + self.ctransformers_llm_logits_data(), + self.ctransformers_llm_logits_size(), + ) + + @property + def embeddings(self) -> List[float]: + """The input embeddings.""" + return Vector( + self.ctransformers_llm_embeddings_data(), + self.ctransformers_llm_embeddings_size(), + ) + + def __getattr__(self, name: str) -> Callable: + lib, llm = self._lib, self._llm + if name.startswith("ctransformers_llm_") and hasattr(lib, name): + return partial(getattr(lib, name), llm) + raise AttributeError(f"'LLM' object has no attribute '{name}'") + + def tokenize(self, text: str) -> List[int]: + """Converts a text into list of tokens. + + Args: + text: The text to tokenize. + + Returns: + The list of tokens. + """ + tokens = (c_int * (len(text) + 1))() + n_tokens = self.ctransformers_llm_tokenize(text.encode(), tokens) + return tokens[:n_tokens] + + def detokenize( + self, + tokens: Sequence[int], + decode: bool = True, + ) -> Union[str, bytes]: + """Converts a list of tokens to text. + + Args: + tokens: The list of tokens. + decode: Whether to decode the text as UTF-8 string. + + Returns: + The combined text of all tokens. + """ + if isinstance(tokens, int): + tokens = [tokens] + texts = [] + for token in tokens: + text = self.ctransformers_llm_detokenize(token) + texts.append(text) + texts = b"".join(texts) + if decode: + texts = texts.decode(errors="ignore") + return texts + + def is_eos_token(self, token: int) -> bool: + """Checks if a token is an end-of-sequence token. + + Args: + token: The token to check. + + Returns: + `True` if the token is an end-of-sequence token else `False`. + """ + return self.ctransformers_llm_is_eos_token(token) + + @doc + def eval( + self, + tokens: Sequence[int], + *, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + ) -> None: + """Evaluates a list of tokens. + + Args: + tokens: The list of tokens to evaluate. + {params} + """ + config = self.config + batch_size = get(batch_size, config.batch_size) + threads = get(threads, config.threads) + + n_tokens = len(tokens) + tokens = (c_int * n_tokens)(*tokens) + status = self.ctransformers_llm_batch_eval( + tokens, + n_tokens, + batch_size, + threads, + ) + if not status: + raise RuntimeError("Failed to evaluate tokens.") + + @doc + def sample( + self, + *, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + ) -> int: + """Samples a token from the model. + + Args: + {params} + + Returns: + The sampled token. + """ + config = self.config + top_k = get(top_k, config.top_k) + top_p = get(top_p, config.top_p) + temperature = get(temperature, config.temperature) + repetition_penalty = get(repetition_penalty, config.repetition_penalty) + last_n_tokens = get(last_n_tokens, config.last_n_tokens) + seed = get(seed, config.seed) + + return self.ctransformers_llm_sample( + top_k, + top_p, + temperature, + repetition_penalty, + last_n_tokens, + seed, + ) + + def reset(self) -> None: + """Resets the model state.""" + self.ctransformers_llm_reset() + + def __del__(self): + if self._llm is not None: + self.ctransformers_llm_delete() + + @doc + def generate( + self, + tokens: Sequence[int], + *, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + reset: Optional[bool] = None, + ) -> Generator[int, None, None]: + """Generates new tokens from a list of tokens. + + Args: + tokens: The list of tokens to generate tokens from. + {params} + + Returns: + The generated tokens. + """ + config = self.config + reset = get(reset, config.reset) + + if reset: + self.reset() + + self.eval(tokens, batch_size=batch_size, threads=threads) + while True: + token = self.sample( + top_k=top_k, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + last_n_tokens=last_n_tokens, + seed=seed, + ) + self.eval([token], batch_size=batch_size, threads=threads) + if self.is_eos_token(token): + break + yield token + + def _stream( + self, + prompt: str, + *, + max_new_tokens: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + stop: Optional[Sequence[str]] = None, + reset: Optional[bool] = None, + ) -> Generator[str, None, None]: + config = self.config + max_new_tokens = get(max_new_tokens, config.max_new_tokens) + stop = get(stop, config.stop) or [] + if isinstance(stop, str): + stop = [stop] + + tokens = self.tokenize(prompt) + + stop_regex = re.compile("|".join(map(re.escape, stop))) + count = 0 + text = "" + incomplete = b"" + for token in self.generate( + tokens, + top_k=top_k, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + last_n_tokens=last_n_tokens, + seed=seed, + batch_size=batch_size, + threads=threads, + reset=reset, + ): + # Handle incomplete UTF-8 multi-byte characters. + incomplete += self.detokenize([token], decode=False) + complete, incomplete = utf8_split_incomplete(incomplete) + text += complete.decode(errors="ignore") + + # https://github.com/abetlen/llama-cpp-python/blob/1a13d76c487df1c8560132d10bda62d6e2f4fa93/llama_cpp/llama.py#L686-L706 + # Check if one of the stop sequences is part of the text. + # Note that the stop sequence may not always be at the end of text. + if stop: + match = stop_regex.search(text) + if match: + text = text[: match.start()] + break + + # Avoid sending the longest suffix of text which is also a prefix + # of a stop sequence, as it can form a stop sequence with the text + # generated later. + longest = 0 + for s in stop: + for i in range(len(s), 0, -1): + if text.endswith(s[:i]): + longest = max(i, longest) + break + + end = len(text) - longest + if end > 0: + yield text[:end] + text = text[end:] + + count += 1 + if count >= max_new_tokens: + break + + if text: + yield text + + @doc + def __call__( + self, + prompt: str, + *, + max_new_tokens: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + temperature: Optional[float] = None, + repetition_penalty: Optional[float] = None, + last_n_tokens: Optional[int] = None, + seed: Optional[int] = None, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + stop: Optional[Sequence[str]] = None, + stream: Optional[bool] = None, + reset: Optional[bool] = None, + ) -> Union[str, Generator[str, None, None]]: + """Generates text from a prompt. + + Args: + prompt: The prompt to generate text from. + {params} + + Returns: + The generated text. + """ + config = self.config + stream = get(stream, config.stream) + + text = self._stream( + prompt, + max_new_tokens=max_new_tokens, + top_k=top_k, + top_p=top_p, + temperature=temperature, + repetition_penalty=repetition_penalty, + last_n_tokens=last_n_tokens, + seed=seed, + batch_size=batch_size, + threads=threads, + stop=stop, + reset=reset, + ) + if stream: + return text + return "".join(text) + + @doc + def embed( + self, + input: Union[str, Sequence[int]], + *, + batch_size: Optional[int] = None, + threads: Optional[int] = None, + ) -> List[float]: + """Computes embeddings for a text or list of tokens. + + > **Note:** Currently only LLaMA models support embeddings. + + Args: + input: The input text or list of tokens to get embeddings for. + {params} + + Returns: + The input embeddings. + """ + if isinstance(input, str): + input = self.tokenize(input) + self.reset() + self.eval(input, batch_size=batch_size, threads=threads) + return list(self.embeddings) diff --git a/ctransformers/ctransformers/utils.py b/ctransformers/ctransformers/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..894e4da0d1e29fbaad06e10554edc2e99e65126b --- /dev/null +++ b/ctransformers/ctransformers/utils.py @@ -0,0 +1,48 @@ +from collections.abc import MutableSequence +from typing import Any, Tuple + + +class Vector(MutableSequence): + """Provides a Python list-like interface for a C array to access and modify + data in-place without copying the entire data between C and Python. + """ + + def __init__(self, data: Any, size: int): + self._data = data + self._size = size + + def __getitem__(self, index: int) -> Any: + self._validate_index(index) + return self._data[index] + + def __setitem__(self, index: int, value: Any) -> None: + self._validate_index(index) + self._data[index] = value + + def __len__(self) -> int: + return self._size + + def _validate_index(self, index: int) -> None: + if not isinstance(index, int): + raise TypeError("list index must be integer") + if not 0 <= index < self._size: + raise IndexError("list index out of range") + + def __delitem__(self, index: int) -> None: + raise NotImplementedError("This operation is not allowed.") + + def insert(self, index: int, value: Any) -> None: + raise NotImplementedError("This operation is not allowed.") + + +def utf8_is_continuation_byte(byte: int) -> bool: + """Checks if a byte is a UTF-8 continuation byte (most significant bit is 1).""" + return (byte & 0b10000000) != 0 + + +def utf8_split_incomplete(seq: bytes) -> Tuple[bytes, bytes]: + """Splits a sequence of UTF-8 encoded bytes into complete and incomplete bytes.""" + i = len(seq) + while i > 0 and utf8_is_continuation_byte(seq[i - 1]): + i -= 1 + return seq[:i], seq[i:] diff --git a/ctransformers/models/common.h b/ctransformers/models/common.h new file mode 100644 index 0000000000000000000000000000000000000000..29f8a2fdb439715f0716731f4f8c1eb543d6fc5f --- /dev/null +++ b/ctransformers/models/common.h @@ -0,0 +1,205 @@ +#ifndef CTRANSFORMERS_MODELS_COMMON_H_ +#define CTRANSFORMERS_MODELS_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ggml/ggml.h" + +// https://github.com/ggerganov/ggml/blob/master/examples/common.cpp + +struct gpt_vocab { + using id = int32_t; + using token = std::string; + + std::map token_to_id; + std::map id_to_token; + std::vector special_tokens; + + void add_special_token(const std::string &token) { + special_tokens.push_back(token); + } +}; + +std::string convert_to_utf8(const std::wstring &input) { + std::wstring_convert> converter; + return converter.to_bytes(input); +} + +std::wstring convert_to_wstring(const std::string &input) { + std::wstring_convert> converter; + return converter.from_bytes(input); +} + +void gpt_split_words(std::string str, std::vector &words) { + const std::string pattern = + R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"; + const std::regex re(pattern); + std::smatch m; + + while (std::regex_search(str, m, re)) { + for (auto x : m) { + words.push_back(x); + } + str = m.suffix(); + } +} + +std::vector gpt_tokenize(const gpt_vocab &vocab, + const std::string &text) { + std::vector words; + + // first split the text into words + { + std::string str = text; + + // Generate the subpattern from the special_tokens vector if it's not empty + if (!vocab.special_tokens.empty()) { + const std::regex escape(R"([\[\\\^\$\.\|\?\*\+\(\)\{\}])"); + std::string special_tokens_subpattern; + for (const auto &token : vocab.special_tokens) { + if (!special_tokens_subpattern.empty()) { + special_tokens_subpattern += "|"; + } + special_tokens_subpattern += + std::regex_replace(token, escape, R"(\$&)"); + } + + std::regex re(special_tokens_subpattern); + std::smatch m; + // Split the text by special tokens. + while (std::regex_search(str, m, re)) { + // Split the substrings in-between special tokens into words. + gpt_split_words(m.prefix(), words); + // Add matched special tokens as words. + for (auto x : m) { + words.push_back(x); + } + str = m.suffix(); + } + // Remaining text without special tokens will be handled below. + } + + gpt_split_words(str, words); + } + + // find the longest token that forms each word in words: + std::vector tokens; + for (const auto &word : words) { + for (int i = 0; i < (int)word.size();) { + for (int j = word.size() - 1; j >= i; j--) { + auto cand = word.substr(i, j - i + 1); + auto it = vocab.token_to_id.find(cand); + if (it != vocab.token_to_id.end()) { // word.substr(i, j-i+1) in vocab + tokens.push_back(it->second); + i = j + 1; + break; + } else if (j == i) { // word.substr(i, 1) has no matching + fprintf(stderr, "%s: unknown token '%s'\n", __func__, + word.substr(i, 1).data()); + i++; + } + } + } + } + + return tokens; +} + +gpt_vocab::id gpt_sample_top_k_top_p( + const gpt_vocab &vocab, const float *logits, int top_k, double top_p, + double temp, const float repetition_penalty, + const std::unordered_set &recent_tokens, std::mt19937 &rng) { + int n_logits = vocab.id_to_token.size(); + + std::vector> logits_id; + logits_id.reserve(n_logits); + + { + const double scale = 1.0 / temp; + for (int i = 0; i < n_logits; ++i) { + logits_id.push_back(std::make_pair(logits[i] * scale, i)); + } + } + + for (const gpt_vocab::id token : recent_tokens) { + // https://github.com/ggerganov/llama.cpp/blob/3e5aa8a1c44051153d6d7b3eeca2f4b4e5fb310c/llama.cpp#L1690-L1717 + // https://github.com/ggerganov/llama.cpp/blob/3e5aa8a1c44051153d6d7b3eeca2f4b4e5fb310c/examples/main/main.cpp#L432-L434 + double &logit = logits_id[token].first; + if (logit <= 0) { + logit *= repetition_penalty; + } else { + logit /= repetition_penalty; + } + } + + // find the top K tokens + std::partial_sort(logits_id.begin(), logits_id.begin() + top_k, + logits_id.end(), + [](const std::pair &a, + const std::pair &b) { + return a.first > b.first; + }); + + logits_id.resize(top_k); + + double maxl = -INFINITY; + for (const auto &kv : logits_id) { + maxl = std::max(maxl, kv.first); + } + + // compute probs for the top K tokens + std::vector probs; + probs.reserve(logits_id.size()); + + double sum = 0.0; + for (const auto &kv : logits_id) { + double p = exp(kv.first - maxl); + probs.push_back(p); + sum += p; + } + + // normalize the probs + for (auto &p : probs) { + p /= sum; + } + + if (top_p < 1.0f) { + double cumsum = 0.0f; + for (int i = 0; i < top_k; i++) { + cumsum += probs[i]; + if (cumsum >= top_p) { + top_k = i + 1; + probs.resize(top_k); + logits_id.resize(top_k); + break; + } + } + + cumsum = 1.0 / cumsum; + for (int i = 0; i < (int)probs.size(); i++) { + probs[i] *= cumsum; + } + } + + std::discrete_distribution<> dist(probs.begin(), probs.end()); + int idx = dist(rng); + + return logits_id[idx].second; +} + +#endif diff --git a/ctransformers/models/ggml/ggml-cuda.cu b/ctransformers/models/ggml/ggml-cuda.cu new file mode 100644 index 0000000000000000000000000000000000000000..5d584bd435ccd12935ece031c3378cb554b51046 --- /dev/null +++ b/ctransformers/models/ggml/ggml-cuda.cu @@ -0,0 +1,2628 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "ggml-cuda.h" +#include "ggml.h" + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size"); + +#define CUDA_CHECK(err) \ + do { \ + cudaError_t err_ = (err); \ + if (err_ != cudaSuccess) { \ + fprintf(stderr, "CUDA error %d at %s:%d: %s\n", err_, __FILE__, __LINE__, \ + cudaGetErrorString(err_)); \ + exit(1); \ + } \ + } while (0) + +#if CUDART_VERSION >= 12000 +#define CUBLAS_CHECK(err) \ + do { \ + cublasStatus_t err_ = (err); \ + if (err_ != CUBLAS_STATUS_SUCCESS) { \ + fprintf(stderr, "\ncuBLAS error %d at %s:%d: %s\n", \ + err_, __FILE__, __LINE__, cublasGetStatusString(err_)); \ + exit(1); \ + } \ + } while (0) +#else +#define CUBLAS_CHECK(err) \ + do { \ + cublasStatus_t err_ = (err); \ + if (err_ != CUBLAS_STATUS_SUCCESS) { \ + fprintf(stderr, "\ncuBLAS error %d at %s:%d\n", err_, __FILE__, __LINE__); \ + exit(1); \ + } \ + } while (0) +#endif // CUDART_VERSION >= 11 + +typedef void (*dequantize_kernel_t)(const void * vx, const int ib, const int iqs, float & v0, float & v1); +typedef void (*to_fp32_cuda_t)(const void * x, float * y, int k, cudaStream_t stream); +typedef void (*dot_kernel_k_t)(const void * vx, const int ib, const int iqs, const float * y, float & v); +typedef void (*cpy_kernel_t)(const char * cx, char * cdst); +typedef void (*ggml_cuda_func_t)(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); +typedef void (*ggml_cuda_op_t)( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, float * src0_ddf_i, + float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main); + +// QK = number of values after dequantization +// QR = QK / number of values before dequantization + +#define QK4_0 32 +#define QR4_0 2 +typedef struct { + half d; // delta + uint8_t qs[QK4_0 / 2]; // nibbles / quants +} block_q4_0; +static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding"); + +#define QK4_1 32 +#define QR4_1 2 +typedef struct { + half d; // delta + half m; // min + uint8_t qs[QK4_1 / 2]; // nibbles / quants +} block_q4_1; +static_assert(sizeof(block_q4_1) == sizeof(ggml_fp16_t) * 2 + QK4_1 / 2, "wrong q4_1 block size/padding"); + +#define QK5_0 32 +#define QR5_0 2 +typedef struct { + half d; // delta + uint8_t qh[4]; // 5-th bit of quants + uint8_t qs[QK5_0 / 2]; // nibbles / quants +} block_q5_0; +static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding"); + +#define QK5_1 32 +#define QR5_1 2 +typedef struct { + half d; // delta + half m; // min + uint8_t qh[4]; // 5-th bit of quants + uint8_t qs[QK5_1 / 2]; // nibbles / quants +} block_q5_1; +static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding"); + +#define QK8_0 32 +#define QR8_0 1 +typedef struct { + half d; // delta + int8_t qs[QK8_0]; // quants +} block_q8_0; +static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding"); + +//================================= k-quants + +#define QK_K 256 + +typedef struct { + uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits + uint8_t qs[QK_K/4]; // quants + half d; // super-block scale for quantized scales + half dmin; // super-block scale for quantized mins +} block_q2_K; +static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding"); + +typedef struct { + uint8_t hmask[QK_K/8]; + uint8_t qs[QK_K/4]; // nibbles / quants + uint8_t scales[3*QK_K/64]; + half d; +} block_q3_K; +static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + 11 * QK_K / 64, "wrong q3_K block size/padding"); + +typedef struct { + half d; // super-block scale for quantized scales + half dmin; // super-block scale for quantized mins + uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits + uint8_t qs[QK_K/2]; // 4--bit quants +} block_q4_K; +static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding"); + +typedef struct { + half d; // super-block scale for quantized scales + half dmin; // super-block scale for quantized mins + uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits + uint8_t qh[QK_K/8]; // quants, high bit + uint8_t qs[QK_K/2]; // quants, low 4 bits +} block_q5_K; +static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2 + QK_K/8, "wrong q5_K block size/padding"); + +typedef struct { + uint8_t ql[QK_K/2]; // quants, lower 4 bits + uint8_t qh[QK_K/4]; // quants, upper 2 bits + int8_t scales[QK_K/16]; // scales + half d; // delta +} block_q6_K; +static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding"); + +#define WARP_SIZE 32 + +#define CUDA_ADD_BLOCK_SIZE 256 +#define CUDA_MUL_BLOCK_SIZE 256 +#define CUDA_SILU_BLOCK_SIZE 256 +#define CUDA_CPY_BLOCK_SIZE 32 +#define CUDA_SCALE_BLOCK_SIZE 256 +#define CUDA_ROPE_BLOCK_SIZE 256 +#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32 +#define CUDA_DEQUANTIZE_BLOCK_SIZE 256 + +// dmmv = dequantize_mul_mat_vec +#ifndef GGML_CUDA_DMMV_X +#define GGML_CUDA_DMMV_X 32 +#endif +#ifndef GGML_CUDA_DMMV_Y +#define GGML_CUDA_DMMV_Y 1 +#endif + +#ifndef K_QUANTS_PER_ITERATION +#define K_QUANTS_PER_ITERATION 2 +#else +static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUANTS_PER_ITERATION must be 1 or 2"); +#endif + +static __global__ void add_f32(const float * x, const float * y, float * dst, const int k) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= k) { + return; + } + dst[i] = x[i] + y[i]; +} + +static __global__ void mul_f32(const float * x, const float * y, float * dst, const int kx, const int ky) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= kx) { + return; + } + dst[i] = x[i] * y[i%ky]; +} + +static __global__ void silu_f32(const float * x, float * dst, const int k) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= k) { + return; + } + dst[i] = x[i] / (1.0f + expf(-x[i])); +} + +static __global__ void rms_norm_f32(const float * x, float * dst, const int ncols) { + const int row = blockIdx.x*blockDim.y + threadIdx.y; + const int tid = threadIdx.x; + + const float eps = 1e-6; + + float tmp = 0.0f; // partial sum for thread in warp + + for (int i = 0; i < ncols; i += WARP_SIZE) { + const int col = i + tid; + const float xi = x[row*ncols + col]; + tmp += xi * xi; + } + + // sum up partial sums + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + const float mean = tmp / ncols; + const float scale = 1.0f / sqrtf(mean + eps); + + for (int i = 0; i < ncols; i += WARP_SIZE) { + const int col = i + tid; + dst[row*ncols + col] = scale * x[row*ncols + col]; + } +} + +static __device__ void dequantize_q4_0(const void * vx, const int ib, const int iqs, float & v0, float & v1){ + const block_q4_0 * x = (const block_q4_0 *) vx; + + const float d = x[ib].d; + + const uint8_t vui = x[ib].qs[iqs]; + + const int8_t vi0 = vui & 0xF; + const int8_t vi1 = vui >> 4; + + v0 = (vi0 - 8)*d; + v1 = (vi1 - 8)*d; +} + +static __device__ void dequantize_q4_1(const void * vx, const int ib, const int iqs, float & v0, float & v1){ + const block_q4_1 * x = (const block_q4_1 *) vx; + + const float d = x[ib].d; + const float m = x[ib].m; + + const uint8_t vui = x[ib].qs[iqs]; + + const int8_t vi0 = vui & 0xF; + const int8_t vi1 = vui >> 4; + + v0 = vi0*d + m; + v1 = vi1*d + m; +} + +static __device__ void dequantize_q5_0(const void * vx, const int ib, const int iqs, float & v0, float & v1){ + const block_q5_0 * x = (const block_q5_0 *) vx; + + const float d = x[ib].d; + + uint32_t qh; + memcpy(&qh, x[ib].qh, sizeof(qh)); + + const uint8_t xh_0 = ((qh >> (iqs + 0)) << 4) & 0x10; + const uint8_t xh_1 = ((qh >> (iqs + 12)) ) & 0x10; + + const int32_t x0 = ((x[ib].qs[iqs] & 0xf) | xh_0) - 16; + const int32_t x1 = ((x[ib].qs[iqs] >> 4) | xh_1) - 16; + + v0 = x0*d; + v1 = x1*d; +} + +static __device__ void dequantize_q5_1(const void * vx, const int ib, const int iqs, float & v0, float & v1){ + const block_q5_1 * x = (const block_q5_1 *) vx; + + const float d = x[ib].d; + const float m = x[ib].m; + + uint32_t qh; + memcpy(&qh, x[ib].qh, sizeof(qh)); + + const uint8_t xh_0 = ((qh >> (iqs + 0)) << 4) & 0x10; + const uint8_t xh_1 = ((qh >> (iqs + 12)) ) & 0x10; + + const int32_t x0 = ((x[ib].qs[iqs] & 0xf) | xh_0); + const int32_t x1 = ((x[ib].qs[iqs] >> 4) | xh_1); + + v0 = x0*d + m; + v1 = x1*d + m; +} + +static __device__ void dequantize_q8_0(const void * vx, const int ib, const int iqs, float & v0, float & v1){ + const block_q8_0 * x = (const block_q8_0 *) vx; + + const float d = x[ib].d; + + const int8_t vi0 = x[ib].qs[iqs + 0]; + const int8_t vi1 = x[ib].qs[iqs + 1]; + + v0 = vi0*d; + v1 = vi1*d; +} + +//================================== k-quants + +static __global__ void dequantize_block_q2_K(const void * vx, float * yy) { + + const int i = blockIdx.x; + const int tid = threadIdx.x; + const int n = tid/32; + const int l = tid - 32*n; + const int is = 8*n + l/16; + + const block_q2_K * x = (const block_q2_K *) vx; + + const uint8_t q = x[i].qs[32*n + l]; + float * y = yy + i*QK_K + 128*n; + + float dall = x[i].d; + float dmin = x[i].dmin; + y[l+ 0] = dall * (x[i].scales[is+0] & 0xF) * ((q >> 0) & 3) - dmin * (x[i].scales[is+0] >> 4); + y[l+32] = dall * (x[i].scales[is+2] & 0xF) * ((q >> 2) & 3) - dmin * (x[i].scales[is+2] >> 4); + y[l+64] = dall * (x[i].scales[is+4] & 0xF) * ((q >> 4) & 3) - dmin * (x[i].scales[is+4] >> 4); + y[l+96] = dall * (x[i].scales[is+6] & 0xF) * ((q >> 6) & 3) - dmin * (x[i].scales[is+6] >> 4); + +} + +static __global__ void dequantize_block_q3_K(const void * vx, float * yy) { + + int r = threadIdx.x/4; + int i = blockIdx.x; + int tid = r/2; + int is0 = r%2; + int l0 = 16*is0 + 4*(threadIdx.x%4); + int n = tid / 4; + int j = tid - 4*n; + + const block_q3_K * x = (const block_q3_K *) vx; + + uint8_t m = 1 << (4*n + j); + int is = 8*n + 2*j + is0; + int shift = 2*j; + + int8_t us = is < 4 ? (x[i].scales[is-0] & 0xF) | (((x[i].scales[is+8] >> 0) & 3) << 4) : + is < 8 ? (x[i].scales[is-0] & 0xF) | (((x[i].scales[is+4] >> 2) & 3) << 4) : + is < 12 ? (x[i].scales[is-8] >> 4) | (((x[i].scales[is+0] >> 4) & 3) << 4) : + (x[i].scales[is-8] >> 4) | (((x[i].scales[is-4] >> 6) & 3) << 4); + float d_all = x[i].d; + float dl = d_all * (us - 32); + + float * y = yy + i*QK_K + 128*n + 32*j; + const uint8_t * q = x[i].qs + 32*n; + const uint8_t * hm = x[i].hmask; + + for (int l = l0; l < l0+4; ++l) y[l] = dl * ((int8_t)((q[l] >> shift) & 3) - ((hm[l] & m) ? 0 : 4)); + +} + +static inline __device__ void get_scale_min_k4(int j, const uint8_t * q, uint8_t & d, uint8_t & m) { + if (j < 4) { + d = q[j] & 63; m = q[j + 4] & 63; + } else { + d = (q[j+4] & 0xF) | ((q[j-4] >> 6) << 4); + m = (q[j+4] >> 4) | ((q[j-0] >> 6) << 4); + } +} + +static __global__ void dequantize_block_q4_K(const void * vx, float * yy) { + const block_q4_K * x = (const block_q4_K *) vx; + + const int i = blockIdx.x; + + //// assume 64 threads - this is very slightly better than the one below + //const int tid = threadIdx.x; + //const int il = tid/16; + //const int ir = tid%16; + //const int is = 2*il; + //const int n = 2; + + // assume 32 threads + const int tid = threadIdx.x; + const int il = tid/8; + const int ir = tid%8; + const int is = 2*il; + const int n = 4; + + float * y = yy + i*QK_K + 64*il + n*ir; + + const float dall = x[i].d; + const float dmin = x[i].dmin; + + const uint8_t * q = x[i].qs + 32*il + n*ir; + + uint8_t sc, m; + get_scale_min_k4(is + 0, x[i].scales, sc, m); + const float d1 = dall * sc; const float m1 = dmin * m; + get_scale_min_k4(is + 1, x[i].scales, sc, m); + const float d2 = dall * sc; const float m2 = dmin * m; + for (int l = 0; l < n; ++l) { + y[l + 0] = d1 * (q[l] & 0xF) - m1; + y[l +32] = d2 * (q[l] >> 4) - m2; + } +} + +static __global__ void dequantize_block_q5_K(const void * vx, float * yy) { + const block_q5_K * x = (const block_q5_K *) vx; + + const int i = blockIdx.x; + + // assume 64 threads - this is very slightly better than the one below + const int tid = threadIdx.x; + const int il = tid/16; // il is in 0...3 + const int ir = tid%16; // ir is in 0...15 + const int is = 2*il; // is is in 0...6 + + float * y = yy + i*QK_K + 64*il + 2*ir; + + const float dall = x[i].d; + const float dmin = x[i].dmin; + + const uint8_t * ql = x[i].qs + 32*il + 2*ir; + const uint8_t * qh = x[i].qh + 2*ir; + + uint8_t sc, m; + get_scale_min_k4(is + 0, x[i].scales, sc, m); + const float d1 = dall * sc; const float m1 = dmin * m; + get_scale_min_k4(is + 1, x[i].scales, sc, m); + const float d2 = dall * sc; const float m2 = dmin * m; + + uint8_t hm = 1 << (2*il); + y[ 0] = d1 * ((ql[ 0] & 0xF) + (qh[ 0] & hm ? 16 : 0)) - m1; + y[ 1] = d1 * ((ql[ 1] & 0xF) + (qh[ 1] & hm ? 16 : 0)) - m1; + hm <<= 1; + y[32] = d2 * ((ql[ 0] >> 4) + (qh[ 0] & hm ? 16 : 0)) - m2; + y[33] = d2 * ((ql[ 1] >> 4) + (qh[ 1] & hm ? 16 : 0)) - m2; +} + +static __global__ void dequantize_block_q6_K(const void * vx, float * yy) { + const block_q6_K * x = (const block_q6_K *) vx; + + const int i = blockIdx.x; + + // assume 64 threads - this is very slightly better than the one below + const int tid = threadIdx.x; + const int ip = tid/32; // ip is 0 or 1 + const int il = tid - 32*ip; // 0...32 + const int is = 8*ip + il/16; + + float * y = yy + i*QK_K + 128*ip + il; + + const float d = x[i].d; + + const uint8_t * ql = x[i].ql + 64*ip + il; + const uint8_t qh = x[i].qh[32*ip + il]; + const int8_t * sc = x[i].scales + is; + + y[ 0] = d * sc[0] * ((int8_t)((ql[ 0] & 0xF) | (((qh >> 0) & 3) << 4)) - 32); + y[32] = d * sc[2] * ((int8_t)((ql[32] & 0xF) | (((qh >> 2) & 3) << 4)) - 32); + y[64] = d * sc[4] * ((int8_t)((ql[ 0] >> 4) | (((qh >> 4) & 3) << 4)) - 32); + y[96] = d * sc[6] * ((int8_t)((ql[32] >> 4) | (((qh >> 6) & 3) << 4)) - 32); +} + +static __global__ void dequantize_mul_mat_vec_q2_k(const void * vx, const float * yy, float * dst, const int ncols, int nrows) { + + static_assert(16%K_QUANTS_PER_ITERATION == 0, "16 must be divisible by K_QUANTS_PER_ITERATION"); + + const int row = blockIdx.y*blockDim.y + threadIdx.y; + if (row > nrows) return; + + const int num_blocks_per_row = ncols / QK_K; + const int ib0 = row*num_blocks_per_row; + + const block_q2_K * x = (const block_q2_K *)vx + ib0; + + const int tid = threadIdx.x/K_QUANTS_PER_ITERATION; // 0...31 + const int ix = threadIdx.x%K_QUANTS_PER_ITERATION; // 0 + + const int step = 16/K_QUANTS_PER_ITERATION; + + const int im = tid/step; // 0 or 1. 0 computes 0..., 1 computes 128... + const int in = tid - step*im; // 0...7 + + const int l0 = K_QUANTS_PER_ITERATION*in; // 0...14 in steps of 4 + const int q_offset = 32*im + l0; + const int s_offset = 8*im; + const int y_offset = 128*im + l0; + + float tmp = 0; // partial sum for thread in warp + + uint32_t aux[4]; + const uint8_t * d = (const uint8_t *)aux; + const uint8_t * m = (const uint8_t *)(aux + 2); + + for (int i = ix; i < num_blocks_per_row; i += K_QUANTS_PER_ITERATION) { + + const float * y = yy + i * QK_K + y_offset; + const uint8_t * q = x[i].qs + q_offset; + + const float dall = x[i].d; + const float dmin = x[i].dmin; + + const uint32_t * a = (const uint32_t *)(x[i].scales + s_offset); + aux[0] = a[0] & 0x0f0f0f0f; + aux[1] = a[1] & 0x0f0f0f0f; + aux[2] = (a[0] >> 4) & 0x0f0f0f0f; + aux[3] = (a[1] >> 4) & 0x0f0f0f0f; + + float sum1 = 0, sum2 = 0; + for (int l = 0; l < K_QUANTS_PER_ITERATION; ++l) { + sum1 += y[l+ 0] * d[0] * ((q[l+ 0] >> 0) & 3) + + y[l+32] * d[2] * ((q[l+ 0] >> 2) & 3) + + y[l+64] * d[4] * ((q[l+ 0] >> 4) & 3) + + y[l+96] * d[6] * ((q[l+ 0] >> 6) & 3) + + y[l+16] * d[1] * ((q[l+16] >> 0) & 3) + + y[l+48] * d[3] * ((q[l+16] >> 2) & 3) + + y[l+80] * d[5] * ((q[l+16] >> 4) & 3) + +y[l+112] * d[7] * ((q[l+16] >> 6) & 3); + sum2 += y[l+ 0] * m[0] + y[l+32] * m[2] + y[l+64] * m[4] + y[ l+96] * m[6] + + y[l+16] * m[1] + y[l+48] * m[3] + y[l+80] * m[5] + y[l+112] * m[7]; + + } + tmp += dall * sum1 - dmin * sum2; + + } + + // sum up partial sums and write back result + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + if (tid == 0) { + dst[row] = tmp; + } +} + +static __global__ void dequantize_mul_mat_vec_q3_k(const void * vx, const float * yy, float * dst, const int ncols) { + + const uint16_t kmask1 = 0x0303; + const uint16_t kmask2 = 0x0f0f; + + const int row = blockIdx.x; + const int num_blocks_per_row = ncols / QK_K; + const int ib0 = row*num_blocks_per_row; + + const block_q3_K * x = (const block_q3_K *)vx + ib0; + + const int tid = threadIdx.x/2; // 0...15 + const int ix = threadIdx.x%2; // 0, 1 + + const int n = 2; // iterations in the inner loop + const int im = tid/8; // 0 or 1. 0 computes 0..., 1 computes 128... + const int in = tid - 8*im; // 0...7 + + const uint8_t m = 1 << (4*im); + + const int l0 = n*in; // 0...28 in steps of 4 + const int q_offset = 32*im + l0; + const int y_offset = 128*im + l0; + + uint16_t utmp[4]; + const int8_t * s = (const int8_t *)utmp; + + const uint16_t s_shift = 4*im; + + float tmp = 0; // partial sum for thread in warp + + for (int i = ix; i < num_blocks_per_row; i += 2) { + + const float * y = yy + i * QK_K + y_offset; + const uint8_t * q = x[i].qs + q_offset; + const uint8_t * h = x[i].hmask + l0; + + const uint16_t * a = (const uint16_t *)x[i].scales; + utmp[0] = ((a[0] >> s_shift) & kmask2) | (((a[4] >> (s_shift + 0)) & kmask1) << 4); + utmp[1] = ((a[1] >> s_shift) & kmask2) | (((a[5] >> (s_shift + 0)) & kmask1) << 4); + utmp[2] = ((a[2] >> s_shift) & kmask2) | (((a[4] >> (s_shift + 2)) & kmask1) << 4); + utmp[3] = ((a[3] >> s_shift) & kmask2) | (((a[5] >> (s_shift + 2)) & kmask1) << 4); + + const float d = x[i].d; + + float sum = 0; + for (int l = 0; l < n; ++l) { + sum += y[l+ 0] * (s[0] - 32) * (((q[l] >> 0) & 3) - (h[l] & (m << 0) ? 0 : 4)) + + y[l+32] * (s[2] - 32) * (((q[l] >> 2) & 3) - (h[l] & (m << 1) ? 0 : 4)) + + y[l+64] * (s[4] - 32) * (((q[l] >> 4) & 3) - (h[l] & (m << 2) ? 0 : 4)) + + y[l+96] * (s[6] - 32) * (((q[l] >> 6) & 3) - (h[l] & (m << 3) ? 0 : 4)); + sum += y[l+16] * (s[1] - 32) * (((q[l+16] >> 0) & 3) - (h[l+16] & (m << 0) ? 0 : 4)) + + y[l+48] * (s[3] - 32) * (((q[l+16] >> 2) & 3) - (h[l+16] & (m << 1) ? 0 : 4)) + + y[l+80] * (s[5] - 32) * (((q[l+16] >> 4) & 3) - (h[l+16] & (m << 2) ? 0 : 4)) + + y[l+112] * (s[7] - 32) * (((q[l+16] >> 6) & 3) - (h[l+16] & (m << 3) ? 0 : 4)); + } + tmp += d * sum; + + } + + // sum up partial sums and write back result + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + if (tid == 0) { + dst[row] = tmp; + } +} + +static __global__ void dequantize_mul_mat_vec_q4_k(const void * vx, const float * yy, float * dst, const int ncols) { + + const uint16_t kmask1 = 0x3f3f; + const uint16_t kmask2 = 0x0f0f; + const uint16_t kmask3 = 0xc0c0; + + const int row = blockIdx.x; + const int num_blocks_per_row = ncols / QK_K; + const int ib0 = row*num_blocks_per_row; + + const int tid = threadIdx.x/2; // 0...15 + const int ix = threadIdx.x%2; + + const int il = tid/4; // 0...3 + const int ir = tid - 4*il;// 0...3 + const int n = 4; + + const int im = il/2; // 0 or 1. 0 computes 0,32 + 128,160, 1 computes 64,96 + 192,224 + const int in = il%2; + + const int l0 = n*(2*ir + in); + const int q_offset = 32*im + l0; + const int y_offset = 64*im + l0; + + uint16_t aux[4]; + const uint8_t * sc = (const uint8_t *)aux; + + const block_q4_K * x = (const block_q4_K *)vx + ib0; + + float tmp = 0; // partial sum for thread in warp + + for (int i = ix; i < num_blocks_per_row; i += 2) { + + const uint8_t * q1 = x[i].qs + q_offset; + const uint8_t * q2 = q1 + 64; + const float * y1 = yy + i*QK_K + y_offset; + const float * y2 = y1 + 128; + + const float dall = x[i].d; + const float dmin = x[i].dmin; + + const uint16_t * a = (const uint16_t *)x[i].scales; + aux[0] = a[im+0] & kmask1; + aux[1] = a[im+2] & kmask1; + aux[2] = ((a[im+4] >> 0) & kmask2) | ((a[im+0] & kmask3) >> 2); + aux[3] = ((a[im+4] >> 4) & kmask2) | ((a[im+2] & kmask3) >> 2); + + float4 s = {0.f, 0.f, 0.f, 0.f}; + float smin = 0; + for (int l = 0; l < n; ++l) { + s.x += y1[l] * (q1[l] & 0xF); s.y += y1[l+32] * (q1[l] >> 4); + s.z += y2[l] * (q2[l] & 0xF); s.w += y2[l+32] * (q2[l] >> 4); + smin += y1[l] * sc[2] + y1[l+32] * sc[3] + y2[l] * sc[6] + y2[l+32] * sc[7]; + } + tmp += dall * (s.x * sc[0] + s.y * sc[1] + s.z * sc[4] + s.w * sc[5]) - dmin * smin; + + } + + // sum up partial sums and write back result + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + if (tid == 0) { + dst[row] = tmp; + } +} + +static __global__ void dequantize_mul_mat_vec_q5_k(const void * vx, const float * yy, float * dst, const int ncols) { + + const uint16_t kmask1 = 0x3f3f; + const uint16_t kmask2 = 0x0f0f; + const uint16_t kmask3 = 0xc0c0; + + //const int row = blockIdx.x*blockDim.y + threadIdx.y; + const int row = blockIdx.x; + const int num_blocks_per_row = ncols / QK_K; + const int ib0 = row*num_blocks_per_row; + + const int tid = threadIdx.x/2; // 0...15 + const int ix = threadIdx.x%2; + + const int il = tid/4; // 0...3 + const int ir = tid - 4*il;// 0...3 + const int n = 4; + + const int im = il/2; // 0 or 1. 0 computes 0,32 + 128,160, 1 computes 64,96 + 192,224 + const int in = il%2; + + const int l0 = n*(2*ir + in); + const int q_offset = 32*im + l0; + const int y_offset = 64*im + l0; + + const uint8_t hm1 = 1 << (2*im); + const uint8_t hm2 = hm1 << 4; + + uint16_t aux[4]; + const uint8_t * sc = (const uint8_t *)aux; + + const block_q5_K * x = (const block_q5_K *)vx + ib0; + + float tmp = 0; // partial sum for thread in warp + + for (int i = ix; i < num_blocks_per_row; i += 2) { + + const uint8_t * ql1 = x[i].qs + q_offset; + const uint8_t * ql2 = ql1 + 64; + const uint8_t * qh = x[i].qh + l0; + const float * y1 = yy + i*QK_K + y_offset; + const float * y2 = y1 + 128; + + const float dall = x[i].d; + const float dmin = x[i].dmin; + + const uint16_t * a = (const uint16_t *)x[i].scales; + aux[0] = a[im+0] & kmask1; + aux[1] = a[im+2] & kmask1; + aux[2] = ((a[im+4] >> 0) & kmask2) | ((a[im+0] & kmask3) >> 2); + aux[3] = ((a[im+4] >> 4) & kmask2) | ((a[im+2] & kmask3) >> 2); + + float4 sum = {0.f, 0.f, 0.f, 0.f}; + float smin = 0; + for (int l = 0; l < n; ++l) { + sum.x += y1[l+ 0] * ((ql1[l] & 0xF) + (qh[l] & (hm1 << 0) ? 16 : 0)); + sum.y += y1[l+32] * ((ql1[l] >> 4) + (qh[l] & (hm1 << 1) ? 16 : 0)); + sum.z += y2[l+ 0] * ((ql2[l] & 0xF) + (qh[l] & (hm2 << 0) ? 16 : 0)); + sum.w += y2[l+32] * ((ql2[l] >> 4) + (qh[l] & (hm2 << 1) ? 16 : 0)); + smin += y1[l] * sc[2] + y1[l+32] * sc[3] + y2[l] * sc[6] + y2[l+32] * sc[7]; + } + tmp += dall * (sum.x * sc[0] + sum.y * sc[1] + sum.z * sc[4] + sum.w * sc[5]) - dmin * smin; + + } + + // sum up partial sums and write back result + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + if (tid == 0) { + dst[row] = tmp; + } +} + +static __global__ void dequantize_mul_mat_vec_q6_k(const void * vx, const float * yy, float * dst, const int ncols, int nrows) { + + static_assert(16%K_QUANTS_PER_ITERATION == 0, "16 must be divisible by K_QUANTS_PER_ITERATION"); + + const int row = blockIdx.y*blockDim.y + threadIdx.y; + if (row > nrows) return; + + const int num_blocks_per_row = ncols / QK_K; + const int ib0 = row*num_blocks_per_row; + + const block_q6_K * x = (const block_q6_K *)vx + ib0; + + const int tid = threadIdx.x/K_QUANTS_PER_ITERATION; // 0...31 or 0...16 + const int ix = threadIdx.x%K_QUANTS_PER_ITERATION; // 0 or 0, 1 + + const int step = 16/K_QUANTS_PER_ITERATION; // 16 or 8 + + const int im = tid/step; // 0 or 1. 0 computes 0..., 1 computes 128... + const int in = tid - step*im; // 0...15 or 0...7 + +#if K_QUANTS_PER_ITERATION == 1 + const int l0 = K_QUANTS_PER_ITERATION*in; // 0...15 + const int is = 0; +#else + const int l0 = 4 * in; // 0, 4, 8, ..., 28 + const int is = in / 4; +#endif + const int ql_offset = 64*im + l0; + const int qh_offset = 32*im + l0; + const int s_offset = 8*im + is; + const int y_offset = 128*im + l0; + + float tmp = 0; // partial sum for thread in warp + + for (int i = ix; i < num_blocks_per_row; i += K_QUANTS_PER_ITERATION) { + + const float * y = yy + i * QK_K + y_offset; + const uint8_t * ql = x[i].ql + ql_offset; + const uint8_t * qh = x[i].qh + qh_offset; + const int8_t * s = x[i].scales + s_offset; + + const float d = x[i].d; + +#if K_QUANTS_PER_ITERATION == 1 + float sum = y[ 0] * s[0] * d * ((int8_t)((ql[ 0] & 0xF) | ((qh[ 0] & 0x03) << 4)) - 32) + + y[16] * s[1] * d * ((int8_t)((ql[16] & 0xF) | ((qh[16] & 0x03) << 4)) - 32) + + y[32] * s[2] * d * ((int8_t)((ql[32] & 0xF) | ((qh[ 0] & 0x0c) << 2)) - 32) + + y[48] * s[3] * d * ((int8_t)((ql[48] & 0xF) | ((qh[16] & 0x0c) << 2)) - 32) + + y[64] * s[4] * d * ((int8_t)((ql[ 0] >> 4) | ((qh[ 0] & 0x30) >> 0)) - 32) + + y[80] * s[5] * d * ((int8_t)((ql[16] >> 4) | ((qh[16] & 0x30) >> 0)) - 32) + + y[96] * s[6] * d * ((int8_t)((ql[32] >> 4) | ((qh[ 0] & 0xc0) >> 2)) - 32) + +y[112] * s[7] * d * ((int8_t)((ql[48] >> 4) | ((qh[16] & 0xc0) >> 2)) - 32); + tmp += sum; +#else + float sum = 0; + for (int l = 0; l < 4; ++l) { + sum += y[l+ 0] * s[0] * d * ((int8_t)((ql[l+ 0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32) + + y[l+32] * s[2] * d * ((int8_t)((ql[l+32] & 0xF) | (((qh[l] >> 2) & 3) << 4)) - 32) + + y[l+64] * s[4] * d * ((int8_t)((ql[l+ 0] >> 4) | (((qh[l] >> 4) & 3) << 4)) - 32) + + y[l+96] * s[6] * d * ((int8_t)((ql[l+32] >> 4) | (((qh[l] >> 6) & 3) << 4)) - 32); + } + tmp += sum; +#endif + + } + + // sum up partial sums and write back result + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + if (tid == 0) { + dst[row] = tmp; + } +} + +static __device__ void convert_f16(const void * vx, const int ib, const int iqs, float & v0, float & v1){ + const half * x = (const half *) vx; + + v0 = __half2float(x[ib + iqs + 0]); + v1 = __half2float(x[ib + iqs + 1]); +} + +template +static __global__ void dequantize_block(const void * vx, float * y, const int k) { + const int i = blockDim.x*blockIdx.x + 2*threadIdx.x; + + if (i >= k) { + return; + } + + const int ib = i/qk; // block index + const int iqs = (i%qk)/qr; // quant index + const int iybs = i - i%qk; // y block start index + const int y_offset = qr == 1 ? 1 : qk/2; + + // dequantize + float & v0 = y[iybs + iqs + 0]; + float & v1 = y[iybs + iqs + y_offset]; + dequantize_kernel(vx, ib, iqs, v0, v1); +} + +template +static __global__ void dequantize_mul_mat_vec(const void * vx, const float * y, float * dst, const int ncols, const int nrows) { + // qk = quantized weights per x block + // qr = number of quantized weights per data value in x block + const int row = blockIdx.y*blockDim.y + threadIdx.y; + + if (row >= nrows) { + return; + } + + const int tid = threadIdx.x; + + const int iter_stride = 2*GGML_CUDA_DMMV_X; + const int vals_per_iter = iter_stride / WARP_SIZE; // num quantized vals per thread and i iter + const int y_offset = qr == 1 ? 1 : qk/2; + + float tmp = 0.0f; // partial sum for thread in warp + + for (int i = 0; i < ncols; i += iter_stride) { + const int col = i + vals_per_iter*tid; + const int ib = (row*ncols + col)/qk; // x block index + const int iqs = (col%qk)/qr; // x quant index + const int iybs = col - col%qk; // y block start index + +// processing >2 values per i iter is faster for fast GPUs +#pragma unroll + for (int j = 0; j < vals_per_iter; j += 2) { + // process 2 vals per j iter + + // dequantize + float v0, v1; + dequantize_kernel(vx, ib, iqs + j/qr, v0, v1); + // for qr = 2 the iqs needs to increase by 1 per j iter because 2 weights per data val + + // matrix multiplication + tmp += v0 * y[iybs + iqs + j/qr + 0]; + tmp += v1 * y[iybs + iqs + j/qr + y_offset]; + // for qr = 2 the y index needs to increase by 1 per j iter because of y_offset = qk/2 + } + } + + // sum up partial sums and write back result + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + if (tid == 0) { + dst[row] = tmp; + } +} + +static __global__ void mul_mat_p021_f16_f32(const void * vx, const float * y, float * dst, const int ncols_x, const int nrows_x, const int nchannels_x) { + const half * x = (half *) vx; + + const int row_x = blockDim.y*blockIdx.y + threadIdx.y; + const int channel = blockDim.z*blockIdx.z + threadIdx.z; + + const int nrows_y = ncols_x; + const int nrows_dst = nrows_x; + const int row_dst = row_x; + + float tmp = 0.0f; + + for (int col_x0 = 0; col_x0 < ncols_x; col_x0 += blockDim.x) { + const int col_x = col_x0 + threadIdx.x; + + if (col_x >= ncols_x) { + break; + } + + // x is transposed and permuted + const int ix = row_x*nchannels_x*ncols_x + channel*ncols_x + col_x; + const float xi = __half2float(x[ix]); + + const int row_y = col_x; + + + // y is not transposed but permuted + const int iy = channel*nrows_y + row_y; + + tmp += xi * y[iy]; + } + + // dst is not transposed and not permuted + const int idst = channel*nrows_dst + row_dst; + + // sum up partial sums and write back result + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + if (threadIdx.x == 0) { + dst[idst] = tmp; + } +} + +static __global__ void mul_mat_vec_nc_f16_f32( // nc == non-contiguous + const void * vx, const float * y, float * dst, const int ncols_x, const int nrows_x, + const int row_stride_x, const int nchannels_x, const int channel_stride_x) { + + const half * x = (half *) vx; + + const int row_x = blockDim.y*blockIdx.y + threadIdx.y; + const int channel = blockDim.z*blockIdx.z + threadIdx.z; + + const int nrows_y = ncols_x; + const int nrows_dst = nrows_x; + const int row_dst = row_x; + + const int idst = channel*nrows_dst + row_dst; + + float tmp = 0.0f; + + for (int col_x0 = 0; col_x0 < ncols_x; col_x0 += blockDim.x) { + const int col_x = col_x0 + threadIdx.x; + + if (col_x >= ncols_x) { + break; + } + + const int ix = channel*channel_stride_x + row_x*row_stride_x + col_x; + const float xi = __half2float(x[ix]); + + const int row_y = col_x; + + const int iy = channel*nrows_y + row_y; + + tmp += xi * y[iy]; + } + + // sum up partial sums and write back result + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + if (threadIdx.x == 0) { + dst[idst] = tmp; + } +} + +static __device__ void cpy_1_f32_f32(const char * cxi, char * cdsti) { + const float * xi = (float *) cxi; + float * dsti = (float *) cdsti; + + *dsti = *xi; +} + +static __device__ void cpy_1_f32_f16(const char * cxi, char * cdsti) { + const float * xi = (float *) cxi; + half * dsti = (half *) cdsti; + + *dsti = __float2half(*xi); +} + +template +static __global__ void cpy_f32_f16(const char * cx, char * cdst, const int ne, + const int ne00, const int ne01, const int nb00, const int nb01, const int nb02, + const int ne10, const int ne11, const int nb10, const int nb11, const int nb12) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= ne) { + return; + } + + // determine indices i02/i12, i01/i11, i00/i10 as a function of index i of flattened tensor + // then combine those indices with the corresponding byte offsets to get the total offsets + const int i02 = i / (ne00*ne01); + const int i01 = (i - i02*ne01*ne00) / ne00; + const int i00 = i - i02*ne01*ne00 - i01*ne00; + const int x_offset = i00*nb00 + i01*nb01 + i02*nb02; + + const int i12 = i / (ne10*ne11); + const int i11 = (i - i12*ne10*ne11) / ne10; + const int i10 = i - i12*ne10*ne11 - i11*ne10; + const int dst_offset = i10*nb10 + i11*nb11 + i12*nb12; + + cpy_1(cx + x_offset, cdst + dst_offset); +} + +// rope == RoPE == rotary positional embedding +static __global__ void rope_f32(const float * x, float * dst, const int ncols, const float p, const float theta_scale) { + const int col = 2*(blockDim.x*blockIdx.x + threadIdx.x); + + if (col >= ncols) { + return; + } + + const int row = blockDim.y*blockIdx.y + threadIdx.y; + const int i = row*ncols + col; + + const float theta = p*powf(theta_scale, col/2); + const float sin_theta = sinf(theta); + const float cos_theta = cosf(theta); + + const float x0 = x[i + 0]; + const float x1 = x[i + 1]; + + dst[i + 0] = x0*cos_theta - x1*sin_theta; + dst[i + 1] = x0*sin_theta + x1*cos_theta; +} + +static __global__ void diag_mask_inf_f32(const float * x, float * dst, const int ncols, const int rows_per_channel, const int n_past) { + const int col = blockDim.x*blockIdx.x + threadIdx.x; + const int row = blockDim.y*blockIdx.y + threadIdx.y; + + if (col >= ncols) { + return; + } + + const int i = row*ncols + col; + // dst[i] = col > n_past + row ? -INFINITY : x[i]; + dst[i] = x[i] - (col > n_past + row % rows_per_channel) * INT_MAX; // equivalent within rounding error but slightly faster on GPU +} + +// the CUDA soft max implementation differs from the CPU implementation +// instead of doubles floats are used +// values are also not normalized to the maximum value by subtracting it in the exponential function +// theoretically these changes could cause problems with rounding error and arithmetic overflow but for LLaMa it seems to be fine +static __global__ void soft_max_f32(const float * x, float * dst, const int ncols) { + const int row = blockDim.y*blockIdx.y + threadIdx.y; + const int block_size = blockDim.x; + const int tid = threadIdx.x; + + float tmp = 0.0; + + for (int block_start = 0; block_start < ncols; block_start += block_size) { + const int col = block_start + tid; + + if (col >= ncols) { + break; + } + + const int i = row*ncols + col; + const float val = expf(x[i]); + tmp += val; + dst[i] = val; + } + + // sum up partial sums + __syncthreads(); +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32); + } + + for (int block_start = 0; block_start < ncols; block_start += block_size) { + const int col = block_start + tid; + + if (col >= ncols) { + break; + } + + const int i = row*ncols + col; + dst[i] /= tmp; + } +} + +static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= k) { + return; + } + + dst[i] = scale * x[i]; +} + +static void add_f32_cuda(const float * x, const float * y, float * dst, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_ADD_BLOCK_SIZE - 1) / CUDA_ADD_BLOCK_SIZE; + add_f32<<>>(x, y, dst, k); +} + +static void mul_f32_cuda(const float * x, const float * y, float * dst, const int kx, const int ky, cudaStream_t stream) { + const int num_blocks = (kx + CUDA_MUL_BLOCK_SIZE - 1) / CUDA_MUL_BLOCK_SIZE; + mul_f32<<>>(x, y, dst, kx, ky); +} + +static void silu_f32_cuda(const float * x, float * dst, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_SILU_BLOCK_SIZE - 1) / CUDA_SILU_BLOCK_SIZE; + silu_f32<<>>(x, dst, k); +} + +static void rms_norm_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % WARP_SIZE == 0); + const dim3 block_dims(WARP_SIZE, 1, 1); + rms_norm_f32<<>>(x, dst, ncols); +} + +static void dequantize_row_q4_0_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE; + dequantize_block<<>>(vx, y, k); +} + +static void dequantize_row_q4_1_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE; + dequantize_block<<>>(vx, y, k); +} + +static void dequantize_row_q5_0_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE; + dequantize_block<<>>(vx, y, k); +} + +static void dequantize_row_q5_1_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE; + dequantize_block<<>>(vx, y, k); +} + +static void dequantize_row_q8_0_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE; + dequantize_block<<>>(vx, y, k); +} + +static void dequantize_row_q2_K_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int nb = k / QK_K; + dequantize_block_q2_K<<>>(vx, y); +} + +static void dequantize_row_q3_K_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int nb = k / QK_K; + dequantize_block_q3_K<<>>(vx, y); +} + +static void dequantize_row_q4_K_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int nb = k / QK_K; + dequantize_block_q4_K<<>>(vx, y); +} + +static void dequantize_row_q5_K_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int nb = k / QK_K; + dequantize_block_q5_K<<>>(vx, y); +} + +static void dequantize_row_q6_K_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int nb = k / QK_K; + dequantize_block_q6_K<<>>(vx, y); +} + +static void dequantize_mul_mat_vec_q4_0_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % GGML_CUDA_DMMV_X == 0); + const int block_num_y = (nrows + GGML_CUDA_DMMV_Y - 1) / GGML_CUDA_DMMV_Y; + const dim3 block_nums(1, block_num_y, 1); + const dim3 block_dims(WARP_SIZE, GGML_CUDA_DMMV_Y, 1); + dequantize_mul_mat_vec + <<>>(vx, y, dst, ncols, nrows); +} + +static void dequantize_mul_mat_vec_q4_1_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % GGML_CUDA_DMMV_X == 0); + const int block_num_y = (nrows + GGML_CUDA_DMMV_Y - 1) / GGML_CUDA_DMMV_Y; + const dim3 block_nums(1, block_num_y, 1); + const dim3 block_dims(WARP_SIZE, GGML_CUDA_DMMV_Y, 1); + dequantize_mul_mat_vec + <<>>(vx, y, dst, ncols, nrows); +} + +static void dequantize_mul_mat_vec_q5_0_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % GGML_CUDA_DMMV_X == 0); + const int block_num_y = (nrows + GGML_CUDA_DMMV_Y - 1) / GGML_CUDA_DMMV_Y; + const dim3 block_nums(1, block_num_y, 1); + const dim3 block_dims(WARP_SIZE, GGML_CUDA_DMMV_Y, 1); + dequantize_mul_mat_vec + <<>>(vx, y, dst, ncols, nrows); +} + +static void dequantize_mul_mat_vec_q5_1_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % GGML_CUDA_DMMV_X == 0); + const int block_num_y = (nrows + GGML_CUDA_DMMV_Y - 1) / GGML_CUDA_DMMV_Y; + const dim3 block_nums(1, block_num_y, 1); + const dim3 block_dims(WARP_SIZE, GGML_CUDA_DMMV_Y, 1); + dequantize_mul_mat_vec + <<>>(vx, y, dst, ncols, nrows); +} + +static void dequantize_mul_mat_vec_q8_0_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % GGML_CUDA_DMMV_X == 0); + const int block_num_y = (nrows + GGML_CUDA_DMMV_Y - 1) / GGML_CUDA_DMMV_Y; + const dim3 block_nums(1, block_num_y, 1); + const dim3 block_dims(WARP_SIZE, GGML_CUDA_DMMV_Y, 1); + dequantize_mul_mat_vec + <<>>(vx, y, dst, ncols, nrows); +} + +static void dequantize_mul_mat_vec_q2_K_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % QK_K == 0); + const int ny = 2; + const int block_num_y = (nrows + ny - 1) / ny; + const dim3 block_nums(1, block_num_y, 1); + const dim3 block_dims(32, ny, 1); + dequantize_mul_mat_vec_q2_k<<>>(vx, y, dst, ncols, nrows); +} + +static void dequantize_mul_mat_vec_q3_K_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % QK_K == 0); + const dim3 block_dims(32, 1, 1); + dequantize_mul_mat_vec_q3_k<<>>(vx, y, dst, ncols); +} + +static void dequantize_mul_mat_vec_q4_K_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % QK_K == 0); + const dim3 block_dims(32, 1, 1); + dequantize_mul_mat_vec_q4_k<<>>(vx, y, dst, ncols); +} + +static void dequantize_mul_mat_vec_q5_K_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % QK_K == 0); + const dim3 block_dims(32, 1, 1); + dequantize_mul_mat_vec_q5_k<<>>(vx, y, dst, ncols); +} + +static void dequantize_mul_mat_vec_q6_K_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % QK_K == 0); + const int ny = 2 / K_QUANTS_PER_ITERATION; + const int block_num_y = (nrows + ny - 1) / ny; + const dim3 block_nums(1, block_num_y, 1); + const dim3 block_dims(32, ny, 1); + dequantize_mul_mat_vec_q6_k<<>>(vx, y, dst, ncols, nrows); +} + +static void convert_fp16_to_fp32_cuda(const void * vx, float * y, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_DEQUANTIZE_BLOCK_SIZE - 1) / CUDA_DEQUANTIZE_BLOCK_SIZE; + dequantize_block<1, 1, convert_f16><<>>(vx, y, k); +} + +static void convert_mul_mat_vec_f16_cuda(const void * vx, const float * y, float * dst, const int ncols, const int nrows, cudaStream_t stream) { + GGML_ASSERT(ncols % GGML_CUDA_DMMV_X == 0); + const int block_num_y = (nrows + GGML_CUDA_DMMV_Y - 1) / GGML_CUDA_DMMV_Y; + const dim3 block_nums(1, block_num_y, 1); + const dim3 block_dims(WARP_SIZE, GGML_CUDA_DMMV_Y, 1); + dequantize_mul_mat_vec<1, 1, convert_f16> + <<>>(vx, y, dst, ncols, nrows); +} + +static to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type) { + switch (type) { + case GGML_TYPE_Q4_0: + return dequantize_row_q4_0_cuda; + case GGML_TYPE_Q4_1: + return dequantize_row_q4_1_cuda; + case GGML_TYPE_Q5_0: + return dequantize_row_q5_0_cuda; + case GGML_TYPE_Q5_1: + return dequantize_row_q5_1_cuda; + case GGML_TYPE_Q8_0: + return dequantize_row_q8_0_cuda; + case GGML_TYPE_Q2_K: + return dequantize_row_q2_K_cuda; + case GGML_TYPE_Q3_K: + return dequantize_row_q3_K_cuda; + case GGML_TYPE_Q4_K: + return dequantize_row_q4_K_cuda; + case GGML_TYPE_Q5_K: + return dequantize_row_q5_K_cuda; + case GGML_TYPE_Q6_K: + return dequantize_row_q6_K_cuda; + case GGML_TYPE_F16: + return convert_fp16_to_fp32_cuda; + default: + return nullptr; + } +} + +static void ggml_mul_mat_p021_f16_f32_cuda(const void * vx, const float * y, float * dst, const int ncols_x, const int nrows_x, const int nchannels_x, cudaStream_t stream) { + const dim3 block_nums(1, nrows_x, nchannels_x); + const dim3 block_dims(WARP_SIZE, 1, 1); + mul_mat_p021_f16_f32<<>>(vx, y, dst, ncols_x, nrows_x, nchannels_x); +} + +static void ggml_mul_mat_vec_nc_f16_f32_cuda( + const void * vx, const float * y, float * dst, const int ncols_x, const int nrows_x, const int row_stride_x, + const int nchannels_x, const int channel_stride_x, cudaStream_t stream) { + + const dim3 block_nums(1, nrows_x, nchannels_x); + const dim3 block_dims(WARP_SIZE, 1, 1); + mul_mat_vec_nc_f16_f32<<>> + (vx, y, dst, ncols_x, nrows_x, row_stride_x, nchannels_x, channel_stride_x); +} + +static void ggml_cpy_f32_f32_cuda( + const char * cx, char * cdst, const int ne, + const int ne00, const int ne01, const int nb00, const int nb01, const int nb02, + const int ne10, const int ne11, const int nb10, const int nb11, const int nb12, cudaStream_t stream) { + + const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE; + cpy_f32_f16<<>> + (cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12); +} + +static void ggml_cpy_f32_f16_cuda( + const char * cx, char * cdst, const int ne, + const int ne00, const int ne01, const int nb00, const int nb01, const int nb02, + const int ne10, const int ne11, const int nb10, const int nb11, const int nb12, cudaStream_t stream) { + + const int num_blocks = (ne + CUDA_CPY_BLOCK_SIZE - 1) / CUDA_CPY_BLOCK_SIZE; + cpy_f32_f16<<>> + (cx, cdst, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12); +} + +static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE; + scale_f32<<>>(x, dst, scale, k); +} + +static void rope_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, const float p, const float theta_scale, cudaStream_t stream) { + GGML_ASSERT(nrows % 2 == 0); + const dim3 block_dims(2*CUDA_ROPE_BLOCK_SIZE, 1, 1); + const int num_blocks_x = (ncols + 2*CUDA_ROPE_BLOCK_SIZE - 1) / (2*CUDA_ROPE_BLOCK_SIZE); + const dim3 block_nums(num_blocks_x, nrows, 1); + rope_f32<<>>(x, dst, ncols, p, theta_scale); +} + +static void diag_mask_inf_f32_cuda(const float * x, float * dst, const int ncols_x, const int nrows_x, const int rows_per_channel, const int n_past, cudaStream_t stream) { + const dim3 block_dims(CUDA_DIAG_MASK_INF_BLOCK_SIZE, 1, 1); + const int block_num_x = (ncols_x + CUDA_DIAG_MASK_INF_BLOCK_SIZE - 1) / CUDA_DIAG_MASK_INF_BLOCK_SIZE; + const dim3 block_nums(block_num_x, nrows_x, 1); + diag_mask_inf_f32<<>>(x, dst, ncols_x, rows_per_channel, n_past); +} + +static void soft_max_f32_cuda(const float * x, float * dst, const int ncols_x, const int nrows_x, cudaStream_t stream) { + const dim3 block_dims(WARP_SIZE, 1, 1); + const dim3 block_nums(1, nrows_x, 1); + soft_max_f32<<>>(x, dst, ncols_x); +} + +// buffer pool for cuda +#define MAX_CUDA_BUFFERS 256 + +struct scoped_spin_lock { + std::atomic_flag& lock; + scoped_spin_lock(std::atomic_flag& lock) : lock(lock) { + while (lock.test_and_set(std::memory_order_acquire)) { + ; // spin + } + } + ~scoped_spin_lock() { + lock.clear(std::memory_order_release); + } + scoped_spin_lock(const scoped_spin_lock&) = delete; + scoped_spin_lock& operator=(const scoped_spin_lock&) = delete; +}; + +struct cuda_buffer { + void * ptr = nullptr; + size_t size = 0; +}; + +static cuda_buffer g_cuda_buffer_pool[GGML_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS]; +static std::atomic_flag g_cuda_pool_lock = ATOMIC_FLAG_INIT; + +static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) { + scoped_spin_lock lock(g_cuda_pool_lock); + int id; + CUDA_CHECK(cudaGetDevice(&id)); + + for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) { + cuda_buffer& b = g_cuda_buffer_pool[id][i]; + if (b.size >= size && b.ptr != nullptr) { + void * ptr = b.ptr; + *actual_size = b.size; + b.ptr = nullptr; + b.size = 0; + return ptr; + } + } + void * ptr; + CUDA_CHECK(cudaMalloc((void **) &ptr, size)); + *actual_size = size; + return ptr; +} + +static void ggml_cuda_pool_free(void * ptr, size_t size) { + scoped_spin_lock lock(g_cuda_pool_lock); + int id; + CUDA_CHECK(cudaGetDevice(&id)); + + for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) { + cuda_buffer& b = g_cuda_buffer_pool[id][i]; + if (b.ptr == nullptr) { + b.ptr = ptr; + b.size = size; + return; + } + } + fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS\n"); + CUDA_CHECK(cudaFree(ptr)); +} + + +static void * g_scratch_buffer = nullptr; +static size_t g_scratch_size = 1024*1024*1024; // 1 GB by default +static size_t g_scratch_offset = 0; + +static int g_device_count = -1; +static int g_main_device = 0; +static float g_tensor_split[GGML_CUDA_MAX_DEVICES] = {0}; + +static cublasHandle_t g_cublas_handles[GGML_CUDA_MAX_DEVICES] = {nullptr}; + +static cudaStream_t g_cudaStreams_main[GGML_CUDA_MAX_DEVICES] = { nullptr }; + +void ggml_init_cublas() { + static bool initialized = false; + + if (!initialized) { + CUDA_CHECK(cudaGetDeviceCount(&g_device_count)); + GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES); + int64_t total_vram = 0; + for (int id = 0; id < g_device_count; ++id) { + cudaDeviceProp prop; + CUDA_CHECK(cudaGetDeviceProperties(&prop, id)); + g_tensor_split[id] = total_vram; + total_vram += prop.totalGlobalMem; + } + for (int id = 0; id < g_device_count; ++id) { + g_tensor_split[id] /= total_vram; + } + + for (int id = 0; id < g_device_count; ++id) { + CUDA_CHECK(cudaSetDevice(id)); + + // create main stream + CUDA_CHECK(cudaStreamCreateWithFlags(&g_cudaStreams_main[id], cudaStreamNonBlocking)); + + // create cublas handle + CUBLAS_CHECK(cublasCreate(&g_cublas_handles[id])); + CUBLAS_CHECK(cublasSetMathMode(g_cublas_handles[id], CUBLAS_TF32_TENSOR_OP_MATH)); + } + + // configure logging to stdout + // CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr)); + + initialized = true; + } +} + +void ggml_cuda_set_tensor_split(const float * tensor_split) { + bool all_zero = true; + for (int i = 0; i < g_device_count; ++i) { + if (tensor_split[i] != 0.0f) { + all_zero = false; + break; + } + } + if (all_zero) { + return; + } + float split_sum = 0.0f; + for (int i = 0; i < g_device_count; ++i) { + g_tensor_split[i] = split_sum; + split_sum += tensor_split[i]; + } + for (int i = 0; i < g_device_count; ++i) { + g_tensor_split[i] /= split_sum; + } +} + +void * ggml_cuda_host_malloc(size_t size) { + if (getenv("GGML_CUDA_NO_PINNED") != nullptr) { + return nullptr; + } + + void * ptr = nullptr; + cudaError_t err = cudaMallocHost((void **) &ptr, size); + if (err != cudaSuccess) { + // The allocation error can be bypassed. A null ptr will assigned out of this function. + // This can fixed the OOM error in WSL. + cudaGetLastError(); + fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory: %s\n", + size/1024.0/1024.0, cudaGetErrorString(err)); + return nullptr; + } + + return ptr; +} + +void ggml_cuda_host_free(void * ptr) { + CUDA_CHECK(cudaFreeHost(ptr)); +} + +static cudaError_t ggml_cuda_cpy_tensor_2d( + void * dst, const struct ggml_tensor * src, int64_t i3, int64_t i2, int64_t i1_low, int64_t i1_high, cudaStream_t stream) { + + cudaMemcpyKind kind; + char * src_ptr; + if (src->backend == GGML_BACKEND_CPU) { + kind = cudaMemcpyHostToDevice; + src_ptr = (char *) src->data; + } else if (src->backend == GGML_BACKEND_GPU) { + kind = cudaMemcpyDeviceToDevice; + struct ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra; + int id; + CUDA_CHECK(cudaGetDevice(&id)); + src_ptr = (char *) extra->data_device[id]; + } else { + GGML_ASSERT(false); + } + char * dst_ptr = (char *) dst; + + const int64_t ne0 = src->ne[0]; + const int64_t nb0 = src->nb[0]; + const int64_t nb1 = src->nb[1]; + const int64_t nb2 = src->nb[2]; + const int64_t nb3 = src->nb[3]; + const enum ggml_type type = src->type; + const int64_t ts = ggml_type_size(type); + const int64_t bs = ggml_blck_size(type); + int64_t i1_diff = i1_high - i1_low; + + const char * x = src_ptr + i1_low*nb1 + i2*nb2 + i3*nb3; + if (nb0 == ts && nb1 == ts*ne0/bs) { + return cudaMemcpyAsync(dst_ptr, x, i1_diff*nb1, kind, stream); + } else if (nb0 == ts) { + return cudaMemcpy2DAsync(dst_ptr, ts*ne0/bs, x, nb1, ts*ne0/bs, i1_diff, kind, stream); + } else { + for (int64_t i1 = 0; i1 < i1_diff; i1++) { + const void * rx = (const void *) ((const char *) x + i1*nb1); + void * rd = (void *) (dst_ptr + i1*ts*ne0/bs); + // pretend the row is a matrix with cols=1 + cudaError_t r = cudaMemcpy2DAsync(rd, ts/bs, rx, nb0, ts/bs, ne0, kind, stream); + if (r != cudaSuccess) return r; + } + return cudaSuccess; + } +} + +inline void ggml_cuda_op_add( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddf_i != nullptr); + GGML_ASSERT(src1_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const int64_t ne0 = src0->ne[0]; + const int64_t i01_diff = i01_high - i01_low; + + // compute + add_f32_cuda(src0_ddf_i, src1_ddf_i, dst_ddf_i, ne0*i01_diff, cudaStream_main); + CUDA_CHECK(cudaGetLastError()); + + (void) src1; + (void) dst; + (void) src0_ddq_i; + (void) i02; + (void) i1; +} + +inline void ggml_cuda_op_mul( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddf_i != nullptr); + GGML_ASSERT(src1_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const int64_t ne00 = src0->ne[0]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + + for (int64_t i01 = i01_low; i01 < i01_high; i01++) { + const int64_t i11 = i1*ne11 + i01%ne11; // broadcast src1 across src0 + + float * src0_ddf_i01 = src0_ddf_i + i01*ne00; + float * src1_ddf_i01 = src1_ddf_i + i11*ne10; + float * dst_ddf_i01 = dst_ddf_i + i01*ne00; + + // compute + mul_f32_cuda(src0_ddf_i01, src1_ddf_i01, dst_ddf_i01, ne00, ne10, cudaStream_main); + CUDA_CHECK(cudaGetLastError()); + } + + (void) dst; + (void) src0_ddq_i; + (void) i02; +} + +inline void ggml_cuda_op_silu( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const int64_t ne00 = src0->ne[0]; + const int64_t i01_diff = i01_high - i01_low; + + // compute + silu_f32_cuda(src0_ddf_i, dst_ddf_i, ne00*i01_diff, cudaStream_main); + CUDA_CHECK(cudaGetLastError()); + + (void) src1; + (void) dst; + (void) src0_ddq_i; + (void) src1_ddf_i; + (void) i02; + (void) i1; +} + +inline void ggml_cuda_op_rms_norm( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const int64_t ne00 = src0->ne[0]; + const int64_t i01_diff = i01_high - i01_low; + + // compute + rms_norm_f32_cuda(src0_ddf_i, dst_ddf_i, ne00, i01_diff, cudaStream_main); + CUDA_CHECK(cudaGetLastError()); + + (void) src1; + (void) dst; + (void) src0_ddq_i; + (void) src1_ddf_i; + (void) i02; + (void) i1; +} + +inline void ggml_cuda_op_dequantize_mul_mat_vec( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddq_i != nullptr); + GGML_ASSERT(src1_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const int64_t ne00 = src0->ne[0]; + const int64_t nrows = i01_high - i01_low; + + switch (src0->type) { + case GGML_TYPE_Q4_0: + dequantize_mul_mat_vec_q4_0_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_Q4_1: + dequantize_mul_mat_vec_q4_1_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_Q5_0: + dequantize_mul_mat_vec_q5_0_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_Q5_1: + dequantize_mul_mat_vec_q5_1_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_Q8_0: + dequantize_mul_mat_vec_q8_0_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_Q2_K: + dequantize_mul_mat_vec_q2_K_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_Q3_K: + dequantize_mul_mat_vec_q3_K_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_Q4_K: + dequantize_mul_mat_vec_q4_K_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_Q5_K: + dequantize_mul_mat_vec_q5_K_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_Q6_K: + dequantize_mul_mat_vec_q6_K_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + case GGML_TYPE_F16: + convert_mul_mat_vec_f16_cuda(src0_ddq_i, src1_ddf_i, dst_ddf_i, ne00, nrows, cudaStream_main); + break; + default: + GGML_ASSERT(false); + break; + } + CUDA_CHECK(cudaGetLastError()); + + (void) src1; + (void) dst; + (void) src0_ddf_i; + (void) i02; + (void) i1; +} + +inline void ggml_cuda_op_mul_mat_cublas( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddf_i != nullptr); + GGML_ASSERT(src1_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const float alpha = 1.0f; + const float beta = 0.0f; + + const int64_t ne00 = src0->ne[0]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + + const int64_t ne0 = dst->ne[0]; + const int64_t i01_diff = i01_high - i01_low; + + int id; + CUDA_CHECK(cudaGetDevice(&id)); + + // the main device has a larger memory buffer to hold the results from all GPUs + // ldc == nrows of the matrix that cuBLAS writes into + int ldc = dst->backend == GGML_BACKEND_GPU && id == g_main_device ? ne0 : i01_diff; + + CUBLAS_CHECK(cublasSetStream(g_cublas_handles[id], cudaStream_main)); + CUBLAS_CHECK( + cublasSgemm(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N, + i01_diff, ne11, ne10, + &alpha, src0_ddf_i, ne00, + src1_ddf_i, ne10, + &beta, dst_ddf_i, ldc)); + + (void) dst; + (void) src0_ddq_i; + (void) i02; + (void) i1; +} + +inline void ggml_cuda_op_rope( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const int64_t ne00 = src0->ne[0]; + const int64_t i01_diff = i01_high - i01_low; + + const int n_past = ((int32_t *) src1->data)[0]; + const int n_dims = ((int32_t *) src1->data)[1]; + const int mode = ((int32_t *) src1->data)[2]; + GGML_ASSERT(mode == 0); + + const float theta_scale = powf(10000.0, -2.0f/n_dims); + const float p = ((mode & 1) == 0 ? n_past + i02 : i02); + + // compute + rope_f32_cuda(src0_ddf_i, dst_ddf_i, ne00, i01_diff, p, theta_scale, cudaStream_main); + CUDA_CHECK(cudaGetLastError()); + + (void) dst; + (void) src0_ddq_i; + (void) src1_ddf_i; + (void) i1; +} + +inline void ggml_cuda_op_diag_mask_inf( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t i01_diff = i01_high - i01_low; + + const int n_past = ((int32_t *) src1->data)[0]; + + // compute + diag_mask_inf_f32_cuda(src0_ddf_i, dst_ddf_i, ne00, i01_diff, ne01, n_past, cudaStream_main); + CUDA_CHECK(cudaGetLastError()); + + (void) dst; + (void) src0_ddq_i; + (void) src1_ddf_i; + (void) i02; + (void) i1; +} + +inline void ggml_cuda_op_soft_max( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const int64_t ne00 = src0->ne[0]; + const int64_t i01_diff = i01_high - i01_low; + + // compute + soft_max_f32_cuda(src0_ddf_i, dst_ddf_i, ne00, i01_diff, cudaStream_main); + CUDA_CHECK(cudaGetLastError()); + + (void) src1; + (void) dst; + (void) src0_ddq_i; + (void) src1_ddf_i; + (void) i02; + (void) i1; +} + +inline void ggml_cuda_op_scale( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i, + float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i02, int64_t i01_low, int64_t i01_high, int i1, + cudaStream_t & cudaStream_main){ + + GGML_ASSERT(src0_ddf_i != nullptr); + GGML_ASSERT(dst_ddf_i != nullptr); + + const float scale = ((float *) src1->data)[0]; + + const int64_t ne00 = src0->ne[0]; + const int64_t i01_diff = i01_high - i01_low; + + // compute + scale_f32_cuda(src0_ddf_i, dst_ddf_i, scale, ne00*i01_diff, cudaStream_main); + CUDA_CHECK(cudaGetLastError()); + + (void) src1; + (void) dst; + (void) src0_ddq_i; + (void) src1_ddf_i; + (void) i02; + (void) i1; +} + +static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, + ggml_cuda_op_t op, bool src0_needs_f32, bool flatten_rows) { + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + const int64_t nrows0 = ggml_nrows(src0); + + const bool use_src1 = src1 != nullptr; + const int64_t ne10 = use_src1 ? src1->ne[0] : 1; + const int64_t ne11 = use_src1 ? src1->ne[1] : 1; + const int64_t ne12 = use_src1 ? src1->ne[2] : 1; + const int64_t ne13 = use_src1 ? src1->ne[3] : 1; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + GGML_ASSERT(dst->backend != GGML_BACKEND_GPU_SPLIT); + GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_GPU_SPLIT); + + // strides for iteration over dims 3 and 2 + const int64_t num_iters = flatten_rows ? 1 : ne02 * ne03; + const int64_t stride_mod = flatten_rows ? ne02 * ne03 : 1; + const int64_t src0_stride = ne00 * ne01 * stride_mod; + const int64_t src1_stride = ne10 * ne11 * stride_mod; + const int64_t dst_stride = ne0 * ne1 * stride_mod; + + const size_t src0_ts = ggml_type_size(src0->type); + const size_t src0_bs = ggml_blck_size(src0->type); + + struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra; + struct ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr; + struct ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; + + const bool src0_on_device = src0->backend == GGML_BACKEND_GPU || src0->backend == GGML_BACKEND_GPU_SPLIT; + const bool src0_is_contiguous = ggml_is_contiguous(src0); + const bool src0_is_f32 = src0->type == GGML_TYPE_F32; + + const bool src1_is_contiguous = use_src1 && ggml_is_contiguous(src1); + const bool src1_stays_on_host = use_src1 && ( + dst->op == GGML_OP_SCALE || dst->op == GGML_OP_DIAG_MASK_INF || dst->op == GGML_OP_ROPE); + + const bool split = src0->backend == GGML_BACKEND_GPU_SPLIT; + + const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(src0->type); + + // dd = data device + char * src0_ddq[GGML_CUDA_MAX_DEVICES] = {nullptr}; // quantized + float * src0_ddf[GGML_CUDA_MAX_DEVICES] = {nullptr}; // float + float * src1_ddf[GGML_CUDA_MAX_DEVICES] = {nullptr}; + float * dst_ddf[GGML_CUDA_MAX_DEVICES] = {nullptr}; + + // asq = actual size quantized, asf = actual size float + size_t src0_asq[GGML_CUDA_MAX_DEVICES] = {0}; + size_t src0_asf[GGML_CUDA_MAX_DEVICES] = {0}; + size_t src1_asf[GGML_CUDA_MAX_DEVICES] = {0}; + size_t dst_asf[GGML_CUDA_MAX_DEVICES] = {0}; + + // if multiple GPUs are used they need to wait for the main GPU to finish + if (split && g_device_count > 1) { + CUDA_CHECK(cudaSetDevice(g_main_device)); + CUDA_CHECK(cudaDeviceSynchronize()); + } + + for (int id = 0; id < g_device_count; ++id) { + if (!split && id != g_main_device) { + continue; + } + + const bool src1_on_device = use_src1 && src1->backend == GGML_BACKEND_GPU && id == g_main_device; + const bool dst_on_device = dst->backend == GGML_BACKEND_GPU && id == g_main_device; + + int64_t row_low, row_high; + if (split) { + row_low = id == 0 ? 0 : nrows0*g_tensor_split[id]; + row_high = id == g_device_count - 1 ? nrows0 : nrows0*g_tensor_split[id + 1]; + } else { + row_low = 0; + row_high = nrows0; + } + if (row_low == row_high) { + continue; + } + + int64_t row_diff = row_high - row_low; + + cudaSetDevice(id); + + if (src0_on_device && src0_is_contiguous) { + if (src0_is_f32) { + src0_ddf[id] = (float *) src0_extra->data_device[id]; + } else { + src0_ddq[id] = (char *) src0_extra->data_device[id]; + } + } else { + if (src0_is_f32) { + src0_ddf[id] = (float *) ggml_cuda_pool_malloc(row_diff*ne00 * sizeof(float), &src0_asf[id]); + } else { + src0_ddq[id] = (char *) ggml_cuda_pool_malloc(row_diff*ne00 * src0_ts/src0_bs, &src0_asq[id]); + } + } + + if (src0_needs_f32 && !src0_is_f32) { + src0_ddf[id] = (float *) ggml_cuda_pool_malloc(row_diff*ne00 * sizeof(float), &src0_asf[id]); + } + + if (use_src1 && !src1_stays_on_host) { + if (src1_on_device && src1_is_contiguous) { + src1_ddf[id] = (float *) src1_extra->data_device[id]; + } else { + src1_ddf[id] = (float *) ggml_cuda_pool_malloc(num_iters*src1_stride * sizeof(float), &src1_asf[id]); + } + } + if (dst_on_device) { + dst_ddf[id] = (float *) dst_extra->data_device[id]; + } else { + size_t size_dst_ddf = split ? row_diff*ne1 * sizeof(float) : num_iters*dst_stride * sizeof(float); + dst_ddf[id] = (float *) ggml_cuda_pool_malloc(size_dst_ddf, &dst_asf[id]); + } + + const int64_t i03_max = flatten_rows ? 1 : ne03; + const int64_t i02_max = flatten_rows ? 1 : ne02; + const int64_t rows_per_iter = flatten_rows ? nrows0 : ne01; + + for (int64_t i03 = 0; i03 < i03_max; i03++) { + const int64_t i13 = i03 % ne13; + for (int64_t i02 = 0; i02 < i02_max; i02++) { + const int64_t i12 = i02 % ne12; + + const int64_t i0 = i03*ne02 + i02; + + // i0 values that contain the lower/upper rows for a split tensor when using multiple GPUs + const int64_t i0_offset_low = row_low/rows_per_iter; + const int64_t i0_offset_high = row_high/rows_per_iter; + + int64_t i01_low = 0; + int64_t i01_high = rows_per_iter; + if (split) { + if (i0 < i0_offset_low || i0 > i0_offset_high) { + continue; + } + if (i0 == i0_offset_low) { + i01_low = row_low % rows_per_iter; + } + if (i0 == i0_offset_high) { + i01_high = row_high % rows_per_iter; + } + } + + // There is possibly a bug in the Windows nvcc compiler regarding instruction reordering or optimizing out local variables. + // Removing the first assert or changing the order of the arguments causes the second assert to fail. + // Removing both asserts results in i01_high becoming 0 which in turn results in garbage output. + // The root cause seems to be a problem with i0_offset_high becoming 0 when it should always be >0 (for single GPU). + GGML_ASSERT(i01_low == 0 || g_device_count > 1); + GGML_ASSERT(i01_high == rows_per_iter || g_device_count > 1); + + const int64_t i01_diff = i01_high - i01_low; + if (i01_diff == 0) { + continue; + } + const int64_t i11 = i13*ne12 + i12; + + cudaStream_t cudaStream_main = g_cudaStreams_main[id]; + + // for split tensors the data begins at i0 == i0_offset_low + char * src0_ddq_i = src0_ddq[id] + (i0 - i0_offset_low)*src0_stride*src0_ts/src0_bs; + float * src0_ddf_i = src0_ddf[id] + (i0 - i0_offset_low)*src0_stride; + float * src1_ddf_i = src1_ddf[id] + i11*src1_stride; + float * dst_ddf_i = dst_ddf[id] + (i0 - i0_offset_low)*dst_stride; + + // for split tensors the data pointer needs to be rounded down + // to the bin edge for i03, i02 bins beyond the first + if (i0 - i0_offset_low > 0) { + GGML_ASSERT(!flatten_rows); + src0_ddq_i -= (row_low % ne01)*ne00 * src0_ts/src0_bs; + src0_ddf_i -= (row_low % ne01)*ne00; + dst_ddf_i -= (row_low % ne0)*ne1; + } + + // the main device memory buffer can be on VRAM scratch, with space for all partial results + // in that case an offset on dst_ddf_i is needed + if (dst->backend == GGML_BACKEND_GPU && id == g_main_device) { + dst_ddf_i += i01_low; // offset is 0 if no tensor split + } + + // copy src0, src1 to device if necessary + if (use_src1 && !src1_stays_on_host) { + if (src1->backend == GGML_BACKEND_CPU) { + GGML_ASSERT(!flatten_rows || nrows0 == ggml_nrows(src1)); + int64_t nrows1 = flatten_rows ? nrows0 : ne11; + CUDA_CHECK(ggml_cuda_cpy_tensor_2d(src1_ddf_i, src1, i03, i02, 0, nrows1, cudaStream_main)); + } else if (src1->backend == GGML_BACKEND_GPU && src1_is_contiguous) { + if (id != g_main_device) { + GGML_ASSERT(!flatten_rows); + float * src1_ddf_i_source = (float *) src1_extra->data_device[g_main_device]; + src1_ddf_i_source += i11*src1_stride; + CUDA_CHECK(cudaMemcpyAsync(src1_ddf_i, src1_ddf_i_source, src1_stride*sizeof(float), + cudaMemcpyDeviceToDevice, cudaStream_main)); + } + } else if (src1_on_device && !src1_is_contiguous) { + GGML_ASSERT(!split); + CUDA_CHECK(ggml_cuda_cpy_tensor_2d(src1_ddf_i, src1, i03, i02, 0, ne11, cudaStream_main)); + } else { + GGML_ASSERT(false); + } + } + + if (!src0_on_device || !src0_is_contiguous) { + if (src0_is_f32) { + CUDA_CHECK(ggml_cuda_cpy_tensor_2d(src0_ddf_i, src0, i03, i02, i01_low, i01_high, cudaStream_main)); + } else { + CUDA_CHECK(ggml_cuda_cpy_tensor_2d(src0_ddq_i, src0, i03, i02, i01_low, i01_high, cudaStream_main)); + } + } + + // convert src0 to f32 if it is necessary for the ggml_cuda_op + if (src0_needs_f32 && !src0_is_f32) { + to_fp32_cuda(src0_ddq_i, src0_ddf_i, i01_diff*ne00, cudaStream_main); + CUDA_CHECK(cudaGetLastError()); + } + + // do the computation + op(src0, src1, dst, src0_ddq_i, src0_ddf_i, src1_ddf_i, dst_ddf_i, i02, i01_low, i01_high, i11, cudaStream_main); + + // copy dst to host or other device if necessary + if (!dst_on_device) { + void * dst_off_device; + cudaMemcpyKind kind; + if (dst->backend == GGML_BACKEND_CPU) { + dst_off_device = dst->data; + kind = cudaMemcpyDeviceToHost; + } else if (dst->backend == GGML_BACKEND_GPU) { + dst_off_device = dst_extra->data_device[g_main_device]; + kind = cudaMemcpyDeviceToDevice; + } else { + GGML_ASSERT(false); + } + if (split) { + // src0 = weight matrix is saved as a transposed matrix for better memory layout. + // dst is NOT transposed. + // The outputs of cuBLAS matrix matrix multiplications can therefore NOT simply be concatenated for >1 GPU. + // Instead they need to be copied to the correct slice in ne0 = dst row index. + // If dst is a vector with ne0 == 1 then you don't have to do this but it still produces correct results. + for (int64_t j = 0; j < ne1; ++j) { + float * dhf_dst_i = (float *) ((char *) dst_off_device + (j*ne0 + i01_low)*sizeof(float) + i02*nb2 + i03*nb3); + CUDA_CHECK(cudaMemcpyAsync(dhf_dst_i, dst_ddf_i + j*i01_diff, i01_diff*sizeof(float), kind, cudaStream_main)); + } + } else { + float * dhf_dst_i = (float *) ((char *) dst_off_device + i02*nb2 + i03*nb3); + CUDA_CHECK(cudaMemcpyAsync(dhf_dst_i, dst_ddf_i, dst_stride*sizeof(float), kind, cudaStream_main)); + } + } + } + } + } + + // wait until each device is finished, then free their buffers + for (int id = 0; id < g_device_count; ++id) { + if (src0_asq[id] == 0 && src0_asf[id] == 0 && src1_asf[id] == 0 && dst_asf[id] == 0) { + continue; + } + + CUDA_CHECK(cudaSetDevice(id)); + CUDA_CHECK(cudaDeviceSynchronize()); + + if (src0_asq[id] > 0) { + ggml_cuda_pool_free(src0_ddq[id], src0_asq[id]); + } + if (src0_asf[id] > 0) { + ggml_cuda_pool_free(src0_ddf[id], src0_asf[id]); + } + if (src1_asf[id] > 0) { + ggml_cuda_pool_free(src1_ddf[id], src1_asf[id]); + } + if (dst_asf[id] > 0) { + ggml_cuda_pool_free(dst_ddf[id], dst_asf[id]); + } + } +} + +void ggml_cuda_add(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_add, true, true); +} + +void ggml_cuda_mul(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_mul, true, false); // TODO ggml_cuda_op needs modification for flatten +} + +void ggml_cuda_silu(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_silu, true, true); +} + +void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_rms_norm, true, true); +} + +bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { + const int64_t ne10 = src1->ne[0]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + + // TODO: find the optimal values for these + if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && + src1->type == GGML_TYPE_F32 && + dst->type == GGML_TYPE_F32 && + (ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) { + return true; + } + + return false; +} + +void ggml_cuda_mul_mat_vec_p021(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst){ + GGML_ASSERT(ggml_is_permuted(src0) && ggml_is_permuted(src1)); + GGML_ASSERT(src0->backend != GGML_BACKEND_GPU_SPLIT); + GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation + GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + + CUDA_CHECK(cudaSetDevice(g_main_device)); + cudaStream_t cudaStream_main = g_cudaStreams_main[g_main_device]; + + struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra; + void * src0_ddq = src0_extra->data_device[g_main_device]; + + struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra; + float * src1_ddf = (float *) src1_extra->data_device[g_main_device]; + + struct ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; + float * dst_ddf = (float *) dst_extra->data_device[g_main_device]; + + ggml_mul_mat_p021_f16_f32_cuda(src0_ddq, src1_ddf, dst_ddf, ne00, ne01, ne02, cudaStream_main); +} + +void ggml_cuda_mul_mat_vec_nc(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst){ + GGML_ASSERT(!ggml_is_contiguous(src0) && ggml_is_contiguous(src1)); + GGML_ASSERT(!ggml_is_permuted(src0)); + GGML_ASSERT(src0->backend != GGML_BACKEND_GPU_SPLIT); + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + + const int64_t nb01 = src0->nb[1]; + const int64_t nb02 = src0->nb[2]; + + CUDA_CHECK(cudaSetDevice(g_main_device)); + cudaStream_t cudaStream_main = g_cudaStreams_main[g_main_device]; + + struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra; + void * src0_ddq = src0_extra->data_device[g_main_device]; + + struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra; + float * src1_ddf = (float *) src1_extra->data_device[g_main_device]; + + struct ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra; + float * dst_ddf = (float *) dst_extra->data_device[g_main_device]; + + const int row_stride_x = nb01 / sizeof(half); + const int channel_stride_x = nb02 / sizeof(half); + + ggml_mul_mat_vec_nc_f16_f32_cuda(src0_ddq, src1_ddf, dst_ddf, ne00, ne01, row_stride_x, ne02, channel_stride_x, cudaStream_main); +} + +void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + bool all_on_device = (src0->backend == GGML_BACKEND_GPU || src0->backend == GGML_BACKEND_GPU_SPLIT) && + src1->backend == GGML_BACKEND_GPU && dst->backend == GGML_BACKEND_GPU; + + if (all_on_device && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) { + ggml_cuda_mul_mat_vec_p021(src0, src1, dst); + } else if (all_on_device && !ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && src1->ne[1] == 1) { + ggml_cuda_mul_mat_vec_nc(src0, src1, dst); + }else if (src0->type == GGML_TYPE_F32) { + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_mul_mat_cublas, true, false); + } else if (ggml_is_quantized(src0->type) || src0->type == GGML_TYPE_F16) { + if (src1->ne[1] == 1 && src0->ne[0] % GGML_CUDA_DMMV_X == 0 && src0->ne[1] % GGML_CUDA_DMMV_Y == 0) { + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_dequantize_mul_mat_vec, false, false); + } else { + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_mul_mat_cublas, true, false); + } + } else { + GGML_ASSERT(false); + } +} + +void ggml_cuda_scale(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_scale, true, true); +} + +void ggml_cuda_cpy(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + const int64_t ne = ggml_nelements(src0); + GGML_ASSERT(ne == ggml_nelements(src1)); + + GGML_ASSERT(src0->backend == GGML_BACKEND_GPU); + GGML_ASSERT(src1->backend == GGML_BACKEND_GPU); + + GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX); + GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + GGML_ASSERT(src0->ne[3] == 1); + + const int64_t nb00 = src0->nb[0]; + const int64_t nb01 = src0->nb[1]; + const int64_t nb02 = src0->nb[2]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + GGML_ASSERT(src1->ne[3] == 1); + + const int64_t nb10 = src1->nb[0]; + const int64_t nb11 = src1->nb[1]; + const int64_t nb12 = src1->nb[2]; + + CUDA_CHECK(cudaSetDevice(g_main_device)); + cudaStream_t cudaStream_main = g_cudaStreams_main[g_main_device]; + + const struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra; + const struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra; + + char * src0_ddc = (char *) src0_extra->data_device[g_main_device]; + char * src1_ddc = (char *) src1_extra->data_device[g_main_device]; + + if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) { + ggml_cpy_f32_f32_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, + ne10, ne11, nb10, nb11, nb12, cudaStream_main); + } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F16) { + ggml_cpy_f32_f16_cuda(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, + ne10, ne11, nb10, nb11, nb12, cudaStream_main); + } else { + GGML_ASSERT(false); + } + + (void) dst; +} + +void ggml_cuda_diag_mask_inf(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_diag_mask_inf, true, true); +} + +void ggml_cuda_soft_max(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_soft_max, true, true); +} + +void ggml_cuda_rope(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + ggml_cuda_op(src0, src1, dst, ggml_cuda_op_rope, true, false); // FIXME flatten changes results +} + +void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + (void) src0; + (void) src1; + (void) dst; +} + +void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) { + int nrows = ggml_nrows(tensor); + const size_t nb1 = tensor->nb[1]; + ggml_backend backend = tensor->backend; + struct ggml_tensor_extra_gpu * extra = new struct ggml_tensor_extra_gpu; + memset(extra, 0, sizeof(*extra)); + + for (int id = 0; id < g_device_count; ++id) { + if (backend == GGML_BACKEND_GPU && id != g_main_device) { + continue; + } + + cudaSetDevice(id); + + int row_low, row_high; + if (backend == GGML_BACKEND_GPU) { + row_low = 0; + row_high = nrows; + } else if (backend == GGML_BACKEND_GPU_SPLIT) { + row_low = id == 0 ? 0 : nrows*g_tensor_split[id]; + row_high = id == g_device_count - 1 ? nrows : nrows*g_tensor_split[id + 1]; + } else { + GGML_ASSERT(false); + } + if (row_low == row_high) { + continue; + } + + int64_t nrows_split = row_high - row_low; + + const size_t offset_split = row_low*nb1; + const size_t size = ggml_nbytes_split(tensor, nrows_split); + + void * buf; + CUDA_CHECK(cudaMalloc(&buf, size)); + void * buf_host = (char*)data + offset_split; + + cudaMemcpy(buf, buf_host, size, cudaMemcpyHostToDevice); + + extra->data_device[id] = buf; + } + + tensor->extra = extra; +} + +void ggml_cuda_free_data(struct ggml_tensor * tensor) { + if (tensor->backend != GGML_BACKEND_GPU && tensor->backend != GGML_BACKEND_GPU_SPLIT) { + return; + } + + ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; + + for (int id = 0; id < g_device_count; ++id) { + if (extra->data_device[id] == nullptr) { + continue; + } + + CUDA_CHECK(cudaSetDevice(id)); + CUDA_CHECK(cudaFree(extra->data_device[id])); + } + + delete extra; +} + +void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch) { + if (scratch && g_scratch_size == 0) { + return; + } + + // recursively assign CUDA buffers until a compute tensor is found + if (tensor->src0 != nullptr && tensor->src0->backend == GGML_BACKEND_CPU) { + const ggml_op src0_op = tensor->src0->op; + if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW) { + ggml_cuda_assign_buffers_impl(tensor->src0, scratch); + } + } + if (tensor->op == GGML_OP_CPY && tensor->src1->backend == GGML_BACKEND_CPU) { + ggml_cuda_assign_buffers_impl(tensor->src1, scratch); + } + + tensor->backend = GGML_BACKEND_GPU; + struct ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu; + + const bool inplace = (tensor->src0 != nullptr && tensor->src0->data == tensor->data) || + tensor->op == GGML_OP_VIEW; + const size_t size = ggml_nbytes(tensor); + + CUDA_CHECK(cudaSetDevice(g_main_device)); + if (inplace && tensor->src0->backend == GGML_BACKEND_GPU) { + struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src0->extra; + char * src0_ddc = (char *) src0_extra->data_device[g_main_device]; + size_t offset = 0; + if (tensor->op == GGML_OP_VIEW) { + memcpy(&offset, tensor->opt[0]->data, sizeof(size_t)); + } + extra->data_device[g_main_device] = src0_ddc + offset; + } else if (tensor->op == GGML_OP_CPY) { + struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src1->extra; + void * src1_ddv = src1_extra->data_device[g_main_device]; + extra->data_device[g_main_device] = src1_ddv; + } else if (scratch) { + GGML_ASSERT(size <= g_scratch_size); + if (g_scratch_offset + size > g_scratch_size) { + g_scratch_offset = 0; + } + + char * data = (char *) g_scratch_buffer; + if (data == nullptr) { + CUDA_CHECK(cudaMalloc(&data, g_scratch_size)); + g_scratch_buffer = data; + } + extra->data_device[g_main_device] = data + g_scratch_offset; + + g_scratch_offset += size; + + GGML_ASSERT(g_scratch_offset <= g_scratch_size); + } else { // allocate new buffers outside of scratch + void * data; + CUDA_CHECK(cudaMalloc(&data, size)); + CUDA_CHECK(cudaMemset(data, 0, size)); + extra->data_device[g_main_device] = data; + } + + tensor->extra = extra; +} + +void ggml_cuda_assign_buffers(struct ggml_tensor * tensor) { + ggml_cuda_assign_buffers_impl(tensor, true); +} + +void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor) { + ggml_cuda_assign_buffers_impl(tensor, false); +} + +void ggml_cuda_set_main_device(int main_device) { + if (main_device >= g_device_count) { + fprintf(stderr, "warning: cannot set main_device=%d because there are only %d devices. Using device %d instead.\n", + main_device, g_device_count, g_main_device); + return; + } + g_main_device = main_device; + if (g_device_count > 1) { + cudaDeviceProp prop; + CUDA_CHECK(cudaGetDeviceProperties(&prop, g_main_device)); + fprintf(stderr, "%s: using device %d (%s) as main device\n", __func__, g_main_device, prop.name); + } +} + +void ggml_cuda_set_scratch_size(size_t scratch_size) { + g_scratch_size = scratch_size; +} + +void ggml_cuda_free_scratch() { + if (g_scratch_buffer == nullptr) { + return; + } + + CUDA_CHECK(cudaFree(g_scratch_buffer)); + g_scratch_buffer = nullptr; +} + +bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){ + ggml_cuda_func_t func; + const bool any_on_device = tensor->backend == GGML_BACKEND_GPU + || tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT + || (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU); + + switch (tensor->op) { + case GGML_OP_ADD: + if (!any_on_device) { + return false; + } + func = ggml_cuda_add; + break; + case GGML_OP_MUL: + if (!any_on_device) { + return false; + } + func = ggml_cuda_mul; + break; + case GGML_OP_SILU: + if (!any_on_device) { + return false; + } + func = ggml_cuda_silu; + break; + case GGML_OP_RMS_NORM: + if (!any_on_device) { + return false; + } + func = ggml_cuda_rms_norm; + break; + case GGML_OP_MUL_MAT: + if (!any_on_device && !ggml_cuda_can_mul_mat(tensor->src0, tensor->src1, tensor)) { + return false; + } + func = ggml_cuda_mul_mat; + break; + case GGML_OP_SCALE: + if (!any_on_device) { + return false; + } + func = ggml_cuda_scale; + break; + case GGML_OP_CPY: + if (!any_on_device) { + return false; + } + func = ggml_cuda_cpy; + break; + case GGML_OP_RESHAPE: + case GGML_OP_VIEW: + case GGML_OP_PERMUTE: + case GGML_OP_TRANSPOSE: + if (!any_on_device) { + return false; + } + func = ggml_cuda_nop; + break; + case GGML_OP_DIAG_MASK_INF: + if (!any_on_device) { + return false; + } + func = ggml_cuda_diag_mask_inf; + break; + case GGML_OP_SOFT_MAX: + if (!any_on_device) { + return false; + } + func = ggml_cuda_soft_max; + break; + case GGML_OP_ROPE: + if (!any_on_device) { + return false; + } + func = ggml_cuda_rope; + break; + default: + return false; + } + + if (params->ith != 0) { + return true; + } + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return true; + } + func(tensor->src0, tensor->src1, tensor); + return true; +} diff --git a/ctransformers/models/ggml/ggml-cuda.h b/ctransformers/models/ggml/ggml-cuda.h new file mode 100644 index 0000000000000000000000000000000000000000..d32b4484267ab5a3f9028db07330302432c1fd93 --- /dev/null +++ b/ctransformers/models/ggml/ggml-cuda.h @@ -0,0 +1,39 @@ +#pragma once + +#include "ggml.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define GGML_CUDA_MAX_DEVICES 16 + +struct ggml_tensor_extra_gpu { + void * data_device[GGML_CUDA_MAX_DEVICES]; // 1 pointer for each device for split tensors +}; + +void ggml_init_cublas(void); +void ggml_cuda_set_tensor_split(const float * tensor_split); + +void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); +bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); +size_t ggml_cuda_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); +void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize); + +// TODO: export these with GGML_API +void * ggml_cuda_host_malloc(size_t size); +void ggml_cuda_host_free(void * ptr); + +void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor); + +void ggml_cuda_free_data(struct ggml_tensor * tensor); +void ggml_cuda_assign_buffers(struct ggml_tensor * tensor); +void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor); +void ggml_cuda_set_main_device(int main_device); +void ggml_cuda_set_scratch_size(size_t scratch_size); +void ggml_cuda_free_scratch(void); +bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor); + +#ifdef __cplusplus +} +#endif diff --git a/ctransformers/models/ggml/ggml-ggllm.c b/ctransformers/models/ggml/ggml-ggllm.c new file mode 100644 index 0000000000000000000000000000000000000000..9fb397262b27832871d48bd993ff1cfa9b46681f --- /dev/null +++ b/ctransformers/models/ggml/ggml-ggllm.c @@ -0,0 +1,108 @@ +// https://github.com/cmp-nct/ggllm.cpp/blob/master/ggml.c + +// ggml_repeat2 + +struct ggml_tensor *ggml_repeat2(struct ggml_context *ctx, + struct ggml_tensor *a, struct ggml_tensor *b) { + GGML_ASSERT(ggml_can_repeat(a, b)); + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + if (ggml_are_same_shape(a, b) && !is_node) { + return a; + } + + struct ggml_tensor *result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne); + + result->op = GGML_OP_REPEAT2; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_compute_forward_repeat2 + +static void ggml_compute_forward_repeat2_f32( + const struct ggml_compute_params *params, const struct ggml_tensor *src0, + struct ggml_tensor *dst) { + GGML_ASSERT(params->ith == 0); + GGML_ASSERT(ggml_can_repeat(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + // guaranteed to be an integer due to the check in ggml_can_repeat + const int nr0 = (int)(ne0 / ne00); + const int nr1 = (int)(ne1 / ne01); + const int nr2 = (int)(ne2 / ne02); + const int nr3 = (int)(ne3 / ne03); + + // TODO: support for transposed / permuted tensors + GGML_ASSERT(nb0 == sizeof(float)); + GGML_ASSERT(nb00 == sizeof(float)); + + int i2k2 = 0; + + // TODO: maybe this is not optimal? + for (int i3 = 0; i3 < nr3; i3++) { + for (int k3 = 0; k3 < ne03; k3++, i2k2 = 0) { + for (int i2 = 0; i2 < nr2; i2++) { + for (int k2 = 0; k2 < ne02; k2++, i2k2++) { + for (int i1 = 0; i1 < nr1; i1++) { + for (int k1 = 0; k1 < ne01; k1++) { + for (int i0 = 0; i0 < nr0; i0++) { + ggml_vec_cpy_f32( + ne00, + (float *)((char *)dst->data + (i3 * ne03 + k3) * nb3 + + (i2 * ne02 + k2) * nb2 + (i1 * ne01 + k1) * nb1 + + (i0 * ne00) * nb0), + (float *)((char *)src0->data + (k3)*nb03 + + (i2k2 / nr2) * nb02 + (k1)*nb01)); + } + } + } + } + } + } + } +} + +static void ggml_compute_forward_repeat2( + const struct ggml_compute_params *params, const struct ggml_tensor *src0, + struct ggml_tensor *dst) { + switch (src0->type) { + case GGML_TYPE_F32: { + ggml_compute_forward_repeat2_f32(params, src0, dst); + } break; + default: { + GGML_ASSERT(false); + } break; + } +} diff --git a/ctransformers/models/ggml/ggml-ggllm.h b/ctransformers/models/ggml/ggml-ggllm.h new file mode 100644 index 0000000000000000000000000000000000000000..8a7e9ee3584a2bc25bb8f87b3f6182965e1a4fc9 --- /dev/null +++ b/ctransformers/models/ggml/ggml-ggllm.h @@ -0,0 +1,5 @@ +// https://github.com/cmp-nct/ggllm.cpp/blob/master/ggml.h + +GGML_API struct ggml_tensor* ggml_repeat2(struct ggml_context* ctx, + struct ggml_tensor* a, + struct ggml_tensor* b); diff --git a/ctransformers/models/ggml/ggml.c b/ctransformers/models/ggml/ggml.c new file mode 100644 index 0000000000000000000000000000000000000000..9d99f0a27bf2189295ebaf8619949e43ea60d1a5 --- /dev/null +++ b/ctransformers/models/ggml/ggml.c @@ -0,0 +1,18084 @@ +// Defines CLOCK_MONOTONIC on Linux +#define _GNU_SOURCE + +#include "ggml.h" + +#ifdef GGML_USE_K_QUANTS +#include "k_quants.h" +#endif + +#if defined(_MSC_VER) || defined(__MINGW32__) +#include // using malloc.h with MSC/MINGW +#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef GGML_USE_METAL +#include +#endif + +// if C99 - static_assert is noop +// ref: https://stackoverflow.com/a/53923785/4039976 +#ifndef static_assert +#define static_assert(cond, msg) struct global_scope_noop_trick +#endif + +#if defined(_MSC_VER) +// disable "possible loss of data" to avoid hundreds of casts +// we should just be careful :) +#pragma warning(disable: 4244 4267) +#endif + +#if defined(_WIN32) + +#include + +typedef volatile LONG atomic_int; +typedef atomic_int atomic_bool; + +static void atomic_store(atomic_int* ptr, LONG val) { + InterlockedExchange(ptr, val); +} +static LONG atomic_load(atomic_int* ptr) { + return InterlockedCompareExchange(ptr, 0, 0); +} +static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) { + return InterlockedExchangeAdd(ptr, inc); +} +static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) { + return atomic_fetch_add(ptr, -(dec)); +} + +typedef HANDLE pthread_t; + +typedef DWORD thread_ret_t; +static int pthread_create(pthread_t* out, void* unused, thread_ret_t(*func)(void*), void* arg) { + (void) unused; + HANDLE handle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, arg, 0, NULL); + if (handle == NULL) + { + return EAGAIN; + } + + *out = handle; + return 0; +} + +static int pthread_join(pthread_t thread, void* unused) { + (void) unused; + return (int) WaitForSingleObject(thread, INFINITE); +} + +static int sched_yield (void) { + Sleep (0); + return 0; +} +#else +#include +#include + +typedef void* thread_ret_t; +#endif + +// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512 +#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)) +#ifndef __FMA__ +#define __FMA__ +#endif +#ifndef __F16C__ +#define __F16C__ +#endif +#ifndef __SSE3__ +#define __SSE3__ +#endif +#endif + +#ifdef __HAIKU__ +#define static_assert(cond, msg) _Static_assert(cond, msg) +#endif + +/*#define GGML_PERF*/ +#define GGML_DEBUG 0 +#define GGML_GELU_FP16 +#define GGML_SILU_FP16 + +#define GGML_SOFT_MAX_UNROLL 4 +#define GGML_VEC_DOT_UNROLL 2 + +#ifdef GGML_USE_ACCELERATE +// uncomment to use vDSP for soft max computation +// note: not sure if it is actually faster +//#define GGML_SOFT_MAX_ACCELERATE +#endif + +#if UINTPTR_MAX == 0xFFFFFFFF + #define GGML_MEM_ALIGN 4 +#else + #define GGML_MEM_ALIGN 16 +#endif + +#if defined(_MSC_VER) || defined(__MINGW32__) +#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) +#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr) +#else +inline static void* ggml_aligned_malloc(size_t size) { + void* aligned_memory = NULL; +#ifdef GGML_USE_METAL + int result = posix_memalign(&aligned_memory, getpagesize(), size); +#else + int result = posix_memalign(&aligned_memory, GGML_MEM_ALIGN, size); +#endif + if (result != 0) { + // Handle allocation failure + return NULL; + } + return aligned_memory; +} +#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size) +#define GGML_ALIGNED_FREE(ptr) free(ptr) +#endif + +#define UNUSED(x) (void)(x) +#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0) + +#if defined(GGML_USE_ACCELERATE) +#include +#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions +#include "ggml-opencl.h" +#endif +#elif defined(GGML_USE_OPENBLAS) +#include +#elif defined(GGML_USE_CUBLAS) +#include "ggml-cuda.h" +#elif defined(GGML_USE_CLBLAST) +#include "ggml-opencl.h" +#endif + +#undef MIN +#undef MAX +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +// floating point type used to accumulate sums +typedef double ggml_float; + +// 16-bit float +// on Arm, we use __fp16 +// on x86, we use uint16_t +#ifdef __ARM_NEON + +// if YCM cannot find , make a symbolic link to it, for example: +// +// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ +// +#include + +#define GGML_COMPUTE_FP16_TO_FP32(x) ((float) (x)) +#define GGML_COMPUTE_FP32_TO_FP16(x) (x) + +#define GGML_FP16_TO_FP32(x) ((float) (x)) +#define GGML_FP32_TO_FP16(x) (x) + +#else + +#ifdef __wasm_simd128__ +#include +#else +#ifdef __POWER9_VECTOR__ +#include +#undef bool +#define bool _Bool +#else +#if defined(_MSC_VER) || defined(__MINGW32__) +#include +#else +#if !defined(__riscv) +#include +#endif +#endif +#endif +#endif + +#ifdef __F16C__ + +#ifdef _MSC_VER +#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x))) +#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0) +#else +#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x) +#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0) +#endif + +#elif defined(__POWER9_VECTOR__) + +#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) +#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) +/* the inline asm below is about 12% faster than the lookup method */ +#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x) +#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) + +static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { + register float f; + register double d; + __asm__( + "mtfprd %0,%2\n" + "xscvhpdp %0,%0\n" + "frsp %1,%0\n" : + /* temp */ "=d"(d), + /* out */ "=f"(f): + /* in */ "r"(h)); + return f; +} + +static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { + register double d; + register ggml_fp16_t r; + __asm__( /* xscvdphp can work on double or single precision */ + "xscvdphp %0,%2\n" + "mffprd %1,%0\n" : + /* temp */ "=d"(d), + /* out */ "=r"(r): + /* in */ "f"(f)); + return r; +} + +#else + +// FP16 <-> FP32 +// ref: https://github.com/Maratyszcza/FP16 + +static inline float fp32_from_bits(uint32_t w) { + union { + uint32_t as_bits; + float as_value; + } fp32; + fp32.as_bits = w; + return fp32.as_value; +} + +static inline uint32_t fp32_to_bits(float f) { + union { + float as_value; + uint32_t as_bits; + } fp32; + fp32.as_value = f; + return fp32.as_bits; +} + +static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { + const uint32_t w = (uint32_t) h << 16; + const uint32_t sign = w & UINT32_C(0x80000000); + const uint32_t two_w = w + w; + + const uint32_t exp_offset = UINT32_C(0xE0) << 23; +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) + const float exp_scale = 0x1.0p-112f; +#else + const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); +#endif + const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; + + const uint32_t magic_mask = UINT32_C(126) << 23; + const float magic_bias = 0.5f; + const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; + + const uint32_t denormalized_cutoff = UINT32_C(1) << 27; + const uint32_t result = sign | + (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); + return fp32_from_bits(result); +} + +static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) + const float scale_to_inf = 0x1.0p+112f; + const float scale_to_zero = 0x1.0p-110f; +#else + const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); + const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); +#endif + float base = (fabsf(f) * scale_to_inf) * scale_to_zero; + + const uint32_t w = fp32_to_bits(f); + const uint32_t shl1_w = w + w; + const uint32_t sign = w & UINT32_C(0x80000000); + uint32_t bias = shl1_w & UINT32_C(0xFF000000); + if (bias < UINT32_C(0x71000000)) { + bias = UINT32_C(0x71000000); + } + + base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; + const uint32_t bits = fp32_to_bits(base); + const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); + const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); + const uint32_t nonsign = exp_bits + mantissa_bits; + return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); +} + +#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) +#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) + +#endif // __F16C__ + +#endif // __ARM_NEON + +// +// global data +// + +// precomputed gelu table for f16 (128 KB) +static ggml_fp16_t table_gelu_f16[1 << 16]; + +// precomputed silu table for f16 (128 KB) +static ggml_fp16_t table_silu_f16[1 << 16]; + +// precomputed exp table for f16 (128 KB) +static ggml_fp16_t table_exp_f16[1 << 16]; + +// precomputed f32 table for f16 (256 KB) +static float table_f32_f16[1 << 16]; + +#if defined(__ARM_NEON) || defined(__wasm_simd128__) +#define B1(c,s,n) 0x ## n ## c , 0x ## n ## s +#define B2(c,s,n) B1(c,s,n ## c), B1(c,s,n ## s) +#define B3(c,s,n) B2(c,s,n ## c), B2(c,s,n ## s) +#define B4(c,s,n) B3(c,s,n ## c), B3(c,s,n ## s) +#define B5(c,s,n) B4(c,s,n ## c), B4(c,s,n ## s) +#define B6(c,s,n) B5(c,s,n ## c), B5(c,s,n ## s) +#define B7(c,s,n) B6(c,s,n ## c), B6(c,s,n ## s) +#define B8(c,s ) B7(c,s, c), B7(c,s, s) + +// precomputed tables for expanding 8bits to 8 bytes: +static const uint64_t table_b2b_0[1 << 8] = { B8(00, 10) }; // ( b) << 4 +static const uint64_t table_b2b_1[1 << 8] = { B8(10, 00) }; // (!b) << 4 +#endif + +// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32, +// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON. +// This is also true for POWER9. +#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16) + +inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) { + uint16_t s; + memcpy(&s, &f, sizeof(uint16_t)); + return table_f32_f16[s]; +} + +#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x) +#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) + +#endif + +// note: do not use these inside ggml.c +// these are meant to be used via the ggml.h API +float ggml_fp16_to_fp32(ggml_fp16_t x) { + return (float) GGML_FP16_TO_FP32(x); +} + +ggml_fp16_t ggml_fp32_to_fp16(float x) { + return GGML_FP32_TO_FP16(x); +} + +void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n) { + for (size_t i = 0; i < n; i++) { + y[i] = GGML_FP16_TO_FP32(x[i]); + } +} + +void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n) { + size_t i = 0; +#if defined(__F16C__) + for (; i + 7 < n; i += 8) { + __m256 x_vec = _mm256_loadu_ps(x + i); + __m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT); + _mm_storeu_si128((__m128i *)(y + i), y_vec); + } + for(; i + 3 < n; i += 4) { + __m128 x_vec = _mm_loadu_ps(x + i); + __m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT); + _mm_storel_epi64((__m128i *)(y + i), y_vec); + } +#endif + for (; i < n; i++) { + y[i] = GGML_FP32_TO_FP16(x[i]); + } +} + + +// +// timing +// + +#if defined(_MSC_VER) || defined(__MINGW32__) +static int64_t timer_freq, timer_start; +void ggml_time_init(void) { + LARGE_INTEGER t; + QueryPerformanceFrequency(&t); + timer_freq = t.QuadPart; + + // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq + // and the uptime is high enough. + // We subtract the program start time to reduce the likelihood of that happening. + QueryPerformanceCounter(&t); + timer_start = t.QuadPart; +} +int64_t ggml_time_ms(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return ((t.QuadPart-timer_start) * 1000) / timer_freq; +} +int64_t ggml_time_us(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return ((t.QuadPart-timer_start) * 1000000) / timer_freq; +} +#else +void ggml_time_init(void) {} +int64_t ggml_time_ms(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000; +} + +int64_t ggml_time_us(void) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000; +} +#endif + +int64_t ggml_cycles(void) { + return clock(); +} + +int64_t ggml_cycles_per_ms(void) { + return CLOCKS_PER_SEC/1000; +} + +#ifdef GGML_PERF +#define ggml_perf_time_ms() ggml_time_ms() +#define ggml_perf_time_us() ggml_time_us() +#define ggml_perf_cycles() ggml_cycles() +#define ggml_perf_cycles_per_ms() ggml_cycles_per_ms() +#else +#define ggml_perf_time_ms() 0 +#define ggml_perf_time_us() 0 +#define ggml_perf_cycles() 0 +#define ggml_perf_cycles_per_ms() 0 +#endif + +// +// cache line +// + +#if defined(__cpp_lib_hardware_interference_size) +#define CACHE_LINE_SIZE hardware_destructive_interference_size +#else +#if defined(__POWER9_VECTOR__) +#define CACHE_LINE_SIZE 128 +#else +#define CACHE_LINE_SIZE 64 +#endif +#endif + +static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float); + +// +// quantization +// + +#define MM256_SET_M128I(a, b) _mm256_insertf128_si256(_mm256_castsi128_si256(b), (a), 1) + +#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) +// multiply int8_t, add results pairwise twice +static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) { + // Get absolute values of x vectors + const __m128i ax = _mm_sign_epi8(x, x); + // Sign the values of the y vectors + const __m128i sy = _mm_sign_epi8(y, x); + // Perform multiplication and create 16-bit values + const __m128i dot = _mm_maddubs_epi16(ax, sy); + const __m128i ones = _mm_set1_epi16(1); + return _mm_madd_epi16(ones, dot); +} + +#if __AVX__ || __AVX2__ || __AVX512F__ +// horizontally add 8 floats +static inline float hsum_float_8(const __m256 x) { + __m128 res = _mm256_extractf128_ps(x, 1); + res = _mm_add_ps(res, _mm256_castps256_ps128(x)); + res = _mm_add_ps(res, _mm_movehl_ps(res, res)); + res = _mm_add_ss(res, _mm_movehdup_ps(res)); + return _mm_cvtss_f32(res); +} + +// horizontally add 8 int32_t +static inline int hsum_i32_8(const __m256i a) { + const __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1)); + const __m128i hi64 = _mm_unpackhi_epi64(sum128, sum128); + const __m128i sum64 = _mm_add_epi32(hi64, sum128); + const __m128i hi32 = _mm_shuffle_epi32(sum64, _MM_SHUFFLE(2, 3, 0, 1)); + return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32)); +} + +// horizontally add 4 int32_t +static inline int hsum_i32_4(const __m128i a) { + const __m128i hi64 = _mm_unpackhi_epi64(a, a); + const __m128i sum64 = _mm_add_epi32(hi64, a); + const __m128i hi32 = _mm_shuffle_epi32(sum64, _MM_SHUFFLE(2, 3, 0, 1)); + return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32)); +} + +#if defined(__AVX2__) || defined(__AVX512F__) +// spread 32 bits to 32 bytes { 0x00, 0xFF } +static inline __m256i bytes_from_bits_32(const uint8_t * x) { + uint32_t x32; + memcpy(&x32, x, sizeof(uint32_t)); + const __m256i shuf_mask = _mm256_set_epi64x( + 0x0303030303030303, 0x0202020202020202, + 0x0101010101010101, 0x0000000000000000); + __m256i bytes = _mm256_shuffle_epi8(_mm256_set1_epi32(x32), shuf_mask); + const __m256i bit_mask = _mm256_set1_epi64x(0x7fbfdfeff7fbfdfe); + bytes = _mm256_or_si256(bytes, bit_mask); + return _mm256_cmpeq_epi8(bytes, _mm256_set1_epi64x(-1)); +} + +// Unpack 32 4-bit fields into 32 bytes +// The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval +static inline __m256i bytes_from_nibbles_32(const uint8_t * rsi) +{ + const __m128i tmp = _mm_loadu_si128((const __m128i *)rsi); + const __m256i bytes = MM256_SET_M128I(_mm_srli_epi16(tmp, 4), tmp); + const __m256i lowMask = _mm256_set1_epi8( 0xF ); + return _mm256_and_si256(lowMask, bytes); +} + +// add int16_t pairwise and return as float vector +static inline __m256 sum_i16_pairs_float(const __m256i x) { + const __m256i ones = _mm256_set1_epi16(1); + const __m256i summed_pairs = _mm256_madd_epi16(ones, x); + return _mm256_cvtepi32_ps(summed_pairs); +} + +static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) { +#if __AVXVNNI__ + const __m256i zero = _mm256_setzero_si256(); + const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy); + return _mm256_cvtepi32_ps(summed_pairs); +#else + // Perform multiplication and create 16-bit values + const __m256i dot = _mm256_maddubs_epi16(ax, sy); + return sum_i16_pairs_float(dot); +#endif +} + +// multiply int8_t, add results pairwise twice and return as float vector +static inline __m256 mul_sum_i8_pairs_float(const __m256i x, const __m256i y) { +#if __AVXVNNIINT8__ + const __m256i zero = _mm256_setzero_si256(); + const __m256i summed_pairs = _mm256_dpbssd_epi32(zero, x, y); + return _mm256_cvtepi32_ps(summed_pairs); +#else + // Get absolute values of x vectors + const __m256i ax = _mm256_sign_epi8(x, x); + // Sign the values of the y vectors + const __m256i sy = _mm256_sign_epi8(y, x); + return mul_sum_us8_pairs_float(ax, sy); +#endif +} + +static inline __m128i packNibbles( __m256i bytes ) +{ + // Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh +#if __AVX512F__ + const __m256i bytes_srli_4 = _mm256_srli_epi16(bytes, 4); // 0000_0000_abcd_0000 + bytes = _mm256_or_si256(bytes, bytes_srli_4); // 0000_abcd_abcd_efgh + return _mm256_cvtepi16_epi8(bytes); // abcd_efgh +#else + const __m256i lowByte = _mm256_set1_epi16( 0xFF ); + __m256i high = _mm256_andnot_si256( lowByte, bytes ); + __m256i low = _mm256_and_si256( lowByte, bytes ); + high = _mm256_srli_epi16( high, 4 ); + bytes = _mm256_or_si256( low, high ); + + // Compress uint16_t lanes into bytes + __m128i r0 = _mm256_castsi256_si128( bytes ); + __m128i r1 = _mm256_extracti128_si256( bytes, 1 ); + return _mm_packus_epi16( r0, r1 ); +#endif +} +#elif defined(__AVX__) +// spread 32 bits to 32 bytes { 0x00, 0xFF } +static inline __m256i bytes_from_bits_32(const uint8_t * x) { + uint32_t x32; + memcpy(&x32, x, sizeof(uint32_t)); + const __m128i shuf_maskl = _mm_set_epi64x(0x0101010101010101, 0x0000000000000000); + const __m128i shuf_maskh = _mm_set_epi64x(0x0303030303030303, 0x0202020202020202); + __m128i bytesl = _mm_shuffle_epi8(_mm_set1_epi32(x32), shuf_maskl); + __m128i bytesh = _mm_shuffle_epi8(_mm_set1_epi32(x32), shuf_maskh); + const __m128i bit_mask = _mm_set1_epi64x(0x7fbfdfeff7fbfdfe); + bytesl = _mm_or_si128(bytesl, bit_mask); + bytesh = _mm_or_si128(bytesh, bit_mask); + bytesl = _mm_cmpeq_epi8(bytesl, _mm_set1_epi64x(-1)); + bytesh = _mm_cmpeq_epi8(bytesh, _mm_set1_epi64x(-1)); + return MM256_SET_M128I(bytesh, bytesl); +} + +// Unpack 32 4-bit fields into 32 bytes +// The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval +static inline __m256i bytes_from_nibbles_32(const uint8_t * rsi) +{ + // Load 16 bytes from memory + __m128i tmpl = _mm_loadu_si128((const __m128i *)rsi); + __m128i tmph = _mm_srli_epi16(tmpl, 4); + const __m128i lowMask = _mm_set1_epi8(0xF); + tmpl = _mm_and_si128(lowMask, tmpl); + tmph = _mm_and_si128(lowMask, tmph); + return MM256_SET_M128I(tmph, tmpl); +} + +// add int16_t pairwise and return as float vector +static inline __m256 sum_i16_pairs_float(const __m128i xh, const __m128i xl) { + const __m128i ones = _mm_set1_epi16(1); + const __m128i summed_pairsl = _mm_madd_epi16(ones, xl); + const __m128i summed_pairsh = _mm_madd_epi16(ones, xh); + const __m256i summed_pairs = MM256_SET_M128I(summed_pairsh, summed_pairsl); + return _mm256_cvtepi32_ps(summed_pairs); +} + +static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) { + const __m128i axl = _mm256_castsi256_si128(ax); + const __m128i axh = _mm256_extractf128_si256(ax, 1); + const __m128i syl = _mm256_castsi256_si128(sy); + const __m128i syh = _mm256_extractf128_si256(sy, 1); + // Perform multiplication and create 16-bit values + const __m128i dotl = _mm_maddubs_epi16(axl, syl); + const __m128i doth = _mm_maddubs_epi16(axh, syh); + return sum_i16_pairs_float(doth, dotl); +} + +// multiply int8_t, add results pairwise twice and return as float vector +static inline __m256 mul_sum_i8_pairs_float(const __m256i x, const __m256i y) { + const __m128i xl = _mm256_castsi256_si128(x); + const __m128i xh = _mm256_extractf128_si256(x, 1); + const __m128i yl = _mm256_castsi256_si128(y); + const __m128i yh = _mm256_extractf128_si256(y, 1); + // Get absolute values of x vectors + const __m128i axl = _mm_sign_epi8(xl, xl); + const __m128i axh = _mm_sign_epi8(xh, xh); + // Sign the values of the y vectors + const __m128i syl = _mm_sign_epi8(yl, xl); + const __m128i syh = _mm_sign_epi8(yh, xh); + // Perform multiplication and create 16-bit values + const __m128i dotl = _mm_maddubs_epi16(axl, syl); + const __m128i doth = _mm_maddubs_epi16(axh, syh); + return sum_i16_pairs_float(doth, dotl); +} + +static inline __m128i packNibbles( __m128i bytes1, __m128i bytes2 ) +{ + // Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh + const __m128i lowByte = _mm_set1_epi16( 0xFF ); + __m128i high = _mm_andnot_si128( lowByte, bytes1 ); + __m128i low = _mm_and_si128( lowByte, bytes1 ); + high = _mm_srli_epi16( high, 4 ); + bytes1 = _mm_or_si128( low, high ); + high = _mm_andnot_si128( lowByte, bytes2 ); + low = _mm_and_si128( lowByte, bytes2 ); + high = _mm_srli_epi16( high, 4 ); + bytes2 = _mm_or_si128( low, high ); + + return _mm_packus_epi16( bytes1, bytes2); +} +#endif +#elif defined(__SSSE3__) +// horizontally add 4x4 floats +static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128 c, const __m128 d) { + __m128 res_0 =_mm_hadd_ps(a, b); + __m128 res_1 =_mm_hadd_ps(c, d); + __m128 res =_mm_hadd_ps(res_0, res_1); + res =_mm_hadd_ps(res, res); + res =_mm_hadd_ps(res, res); + + return _mm_cvtss_f32(res); +} +#endif // __AVX__ || __AVX2__ || __AVX512F__ +#endif // defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) + +#if defined(__ARM_NEON) + +#if !defined(__aarch64__) + +inline static uint16_t vaddvq_u8(uint8x16_t v) { + return + (uint16_t)vgetq_lane_u8(v, 0) + (uint16_t)vgetq_lane_u8(v, 1) + + (uint16_t)vgetq_lane_u8(v, 2) + (uint16_t)vgetq_lane_u8(v, 3) + + (uint16_t)vgetq_lane_u8(v, 4) + (uint16_t)vgetq_lane_u8(v, 5) + + (uint16_t)vgetq_lane_u8(v, 6) + (uint16_t)vgetq_lane_u8(v, 7) + + (uint16_t)vgetq_lane_u8(v, 8) + (uint16_t)vgetq_lane_u8(v, 9) + + (uint16_t)vgetq_lane_u8(v, 10) + (uint16_t)vgetq_lane_u8(v, 11) + + (uint16_t)vgetq_lane_u8(v, 12) + (uint16_t)vgetq_lane_u8(v, 13) + + (uint16_t)vgetq_lane_u8(v, 14) + (uint16_t)vgetq_lane_u8(v, 15); +} + +inline static int16_t vaddvq_s8(int8x16_t v) { + return + (int16_t)vgetq_lane_s8(v, 0) + (int16_t)vgetq_lane_s8(v, 1) + + (int16_t)vgetq_lane_s8(v, 2) + (int16_t)vgetq_lane_s8(v, 3) + + (int16_t)vgetq_lane_s8(v, 4) + (int16_t)vgetq_lane_s8(v, 5) + + (int16_t)vgetq_lane_s8(v, 6) + (int16_t)vgetq_lane_s8(v, 7) + + (int16_t)vgetq_lane_s8(v, 8) + (int16_t)vgetq_lane_s8(v, 9) + + (int16_t)vgetq_lane_s8(v, 10) + (int16_t)vgetq_lane_s8(v, 11) + + (int16_t)vgetq_lane_s8(v, 12) + (int16_t)vgetq_lane_s8(v, 13) + + (int16_t)vgetq_lane_s8(v, 14) + (int16_t)vgetq_lane_s8(v, 15); +} + +inline static int32_t vaddvq_s16(int16x8_t v) { + return + (int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) + + (int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) + + (int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) + + (int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7); +} + +inline static uint32_t vaddvq_u16(uint16x8_t v) { + return + (uint32_t)vgetq_lane_u16(v, 0) + (uint32_t)vgetq_lane_u16(v, 1) + + (uint32_t)vgetq_lane_u16(v, 2) + (uint32_t)vgetq_lane_u16(v, 3) + + (uint32_t)vgetq_lane_u16(v, 4) + (uint32_t)vgetq_lane_u16(v, 5) + + (uint32_t)vgetq_lane_u16(v, 6) + (uint32_t)vgetq_lane_u16(v, 7); +} + +inline static int32_t vaddvq_s32(int32x4_t v) { + return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3); +} + +inline static float vaddvq_f32(float32x4_t v) { + return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3); +} + +inline static float vminvq_f32(float32x4_t v) { + return + MIN(MIN(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)), + MIN(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3))); +} + +inline static float vmaxvq_f32(float32x4_t v) { + return + MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)), + MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3))); +} + +inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) { + int32x4_t res; + + res[0] = roundf(vgetq_lane_f32(v, 0)); + res[1] = roundf(vgetq_lane_f32(v, 1)); + res[2] = roundf(vgetq_lane_f32(v, 2)); + res[3] = roundf(vgetq_lane_f32(v, 3)); + + return res; +} + +#endif +#endif + +#define QK4_0 32 +typedef struct { + ggml_fp16_t d; // delta + uint8_t qs[QK4_0 / 2]; // nibbles / quants +} block_q4_0; +static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding"); + +#define QK4_1 32 +typedef struct { + ggml_fp16_t d; // delta + ggml_fp16_t m; // min + uint8_t qs[QK4_1 / 2]; // nibbles / quants +} block_q4_1; +static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_fp16_t) + QK4_1 / 2, "wrong q4_1 block size/padding"); + +#define QK5_0 32 +typedef struct { + ggml_fp16_t d; // delta + uint8_t qh[4]; // 5-th bit of quants + uint8_t qs[QK5_0 / 2]; // nibbles / quants +} block_q5_0; +static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding"); + +#define QK5_1 32 +typedef struct { + ggml_fp16_t d; // delta + ggml_fp16_t m; // min + uint8_t qh[4]; // 5-th bit of quants + uint8_t qs[QK5_1 / 2]; // nibbles / quants +} block_q5_1; +static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding"); + +#define QK8_0 32 +typedef struct { + ggml_fp16_t d; // delta + int8_t qs[QK8_0]; // quants +} block_q8_0; +static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding"); + +#define QK8_1 32 +typedef struct { + float d; // delta + float s; // d * sum(qs[i]) + int8_t qs[QK8_1]; // quants +} block_q8_1; +static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block size/padding"); + +// reference implementation for deterministic creation of model files +static void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) { + static const int qk = QK4_0; + + assert(k % qk == 0); + + const int nb = k / qk; + + for (int i = 0; i < nb; i++) { + float amax = 0.0f; // absolute max + float max = 0.0f; + + for (int j = 0; j < qk; j++) { + const float v = x[i*qk + j]; + if (amax < fabsf(v)) { + amax = fabsf(v); + max = v; + } + } + + const float d = max / -8; + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = GGML_FP32_TO_FP16(d); + + for (int j = 0; j < qk/2; ++j) { + const float x0 = x[i*qk + 0 + j]*id; + const float x1 = x[i*qk + qk/2 + j]*id; + + const uint8_t xi0 = MIN(15, (int8_t)(x0 + 8.5f)); + const uint8_t xi1 = MIN(15, (int8_t)(x1 + 8.5f)); + + y[i].qs[j] = xi0; + y[i].qs[j] |= xi1 << 4; + } + } +} + +static void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { + quantize_row_q4_0_reference(x, y, k); +} + +static void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k) { + const int qk = QK4_1; + + assert(k % qk == 0); + + const int nb = k / qk; + + for (int i = 0; i < nb; i++) { + float min = FLT_MAX; + float max = -FLT_MAX; + + for (int j = 0; j < qk; j++) { + const float v = x[i*qk + j]; + + if (v < min) min = v; + if (v > max) max = v; + } + + const float d = (max - min) / ((1 << 4) - 1); + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = GGML_FP32_TO_FP16(d); + y[i].m = GGML_FP32_TO_FP16(min); + + for (int j = 0; j < qk/2; ++j) { + const float x0 = (x[i*qk + 0 + j] - min)*id; + const float x1 = (x[i*qk + qk/2 + j] - min)*id; + + const uint8_t xi0 = MIN(15, (int8_t)(x0 + 0.5f)); + const uint8_t xi1 = MIN(15, (int8_t)(x1 + 0.5f)); + + y[i].qs[j] = xi0; + y[i].qs[j] |= xi1 << 4; + } + } +} + +static void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) { + quantize_row_q4_1_reference(x, y, k); +} + +static void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k) { + static const int qk = QK5_0; + + assert(k % qk == 0); + + const int nb = k / qk; + + for (int i = 0; i < nb; i++) { + float amax = 0.0f; // absolute max + float max = 0.0f; + + for (int j = 0; j < qk; j++) { + const float v = x[i*qk + j]; + if (amax < fabsf(v)) { + amax = fabsf(v); + max = v; + } + } + + const float d = max / -16; + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = GGML_FP32_TO_FP16(d); + + uint32_t qh = 0; + + for (int j = 0; j < qk/2; ++j) { + const float x0 = x[i*qk + 0 + j]*id; + const float x1 = x[i*qk + qk/2 + j]*id; + + const uint8_t xi0 = MIN(31, (int8_t)(x0 + 16.5f)); + const uint8_t xi1 = MIN(31, (int8_t)(x1 + 16.5f)); + + y[i].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4); + + // get the 5-th bit and store it in qh at the right position + qh |= ((xi0 & 0x10) >> 4) << (j + 0); + qh |= ((xi1 & 0x10) >> 4) << (j + qk/2); + } + + memcpy(&y[i].qh, &qh, sizeof(qh)); + } +} + +static void quantize_row_q5_0(const float * restrict x, void * restrict y, int k) { + quantize_row_q5_0_reference(x, y, k); +} + +static void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k) { + const int qk = QK5_1; + + assert(k % qk == 0); + + const int nb = k / qk; + + for (int i = 0; i < nb; i++) { + float min = FLT_MAX; + float max = -FLT_MAX; + + for (int j = 0; j < qk; j++) { + const float v = x[i*qk + j]; + + if (v < min) min = v; + if (v > max) max = v; + } + + const float d = (max - min) / ((1 << 5) - 1); + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = GGML_FP32_TO_FP16(d); + y[i].m = GGML_FP32_TO_FP16(min); + + uint32_t qh = 0; + + for (int j = 0; j < qk/2; ++j) { + const float x0 = (x[i*qk + 0 + j] - min)*id; + const float x1 = (x[i*qk + qk/2 + j] - min)*id; + + const uint8_t xi0 = (uint8_t)(x0 + 0.5f); + const uint8_t xi1 = (uint8_t)(x1 + 0.5f); + + y[i].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4); + + // get the 5-th bit and store it in qh at the right position + qh |= ((xi0 & 0x10) >> 4) << (j + 0); + qh |= ((xi1 & 0x10) >> 4) << (j + qk/2); + } + + memcpy(&y[i].qh, &qh, sizeof(y[i].qh)); + } +} + +static void quantize_row_q5_1(const float * restrict x, void * restrict y, int k) { + quantize_row_q5_1_reference(x, y, k); +} + +// reference implementation for deterministic creation of model files +static void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k) { + assert(k % QK8_0 == 0); + const int nb = k / QK8_0; + + for (int i = 0; i < nb; i++) { + float amax = 0.0f; // absolute max + + for (int j = 0; j < QK8_0; j++) { + const float v = x[i*QK8_0 + j]; + amax = MAX(amax, fabsf(v)); + } + + const float d = amax / ((1 << 7) - 1); + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = GGML_FP32_TO_FP16(d); + + for (int j = 0; j < QK8_0; ++j) { + const float x0 = x[i*QK8_0 + j]*id; + + y[i].qs[j] = roundf(x0); + } + } +} + +static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) { + assert(QK8_0 == 32); + assert(k % QK8_0 == 0); + const int nb = k / QK8_0; + + block_q8_0 * restrict y = vy; + +#if defined(__ARM_NEON) + for (int i = 0; i < nb; i++) { + float32x4_t srcv [8]; + float32x4_t asrcv[8]; + float32x4_t amaxv[8]; + + for (int j = 0; j < 8; j++) srcv[j] = vld1q_f32(x + i*32 + 4*j); + for (int j = 0; j < 8; j++) asrcv[j] = vabsq_f32(srcv[j]); + + for (int j = 0; j < 4; j++) amaxv[2*j] = vmaxq_f32(asrcv[2*j], asrcv[2*j+1]); + for (int j = 0; j < 2; j++) amaxv[4*j] = vmaxq_f32(amaxv[4*j], amaxv[4*j+2]); + for (int j = 0; j < 1; j++) amaxv[8*j] = vmaxq_f32(amaxv[8*j], amaxv[8*j+4]); + + const float amax = vmaxvq_f32(amaxv[0]); + + const float d = amax / ((1 << 7) - 1); + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = GGML_FP32_TO_FP16(d); + + for (int j = 0; j < 8; j++) { + const float32x4_t v = vmulq_n_f32(srcv[j], id); + const int32x4_t vi = vcvtnq_s32_f32(v); + + y[i].qs[4*j + 0] = vgetq_lane_s32(vi, 0); + y[i].qs[4*j + 1] = vgetq_lane_s32(vi, 1); + y[i].qs[4*j + 2] = vgetq_lane_s32(vi, 2); + y[i].qs[4*j + 3] = vgetq_lane_s32(vi, 3); + } + } +#elif defined(__wasm_simd128__) + for (int i = 0; i < nb; i++) { + v128_t srcv [8]; + v128_t asrcv[8]; + v128_t amaxv[8]; + + for (int j = 0; j < 8; j++) srcv[j] = wasm_v128_load(x + i*32 + 4*j); + for (int j = 0; j < 8; j++) asrcv[j] = wasm_f32x4_abs(srcv[j]); + + for (int j = 0; j < 4; j++) amaxv[2*j] = wasm_f32x4_max(asrcv[2*j], asrcv[2*j+1]); + for (int j = 0; j < 2; j++) amaxv[4*j] = wasm_f32x4_max(amaxv[4*j], amaxv[4*j+2]); + for (int j = 0; j < 1; j++) amaxv[8*j] = wasm_f32x4_max(amaxv[8*j], amaxv[8*j+4]); + + const float amax = MAX(MAX(wasm_f32x4_extract_lane(amaxv[0], 0), + wasm_f32x4_extract_lane(amaxv[0], 1)), + MAX(wasm_f32x4_extract_lane(amaxv[0], 2), + wasm_f32x4_extract_lane(amaxv[0], 3))); + + const float d = amax / ((1 << 7) - 1); + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = GGML_FP32_TO_FP16(d); + + for (int j = 0; j < 8; j++) { + const v128_t v = wasm_f32x4_mul(srcv[j], wasm_f32x4_splat(id)); + const v128_t vi = wasm_i32x4_trunc_sat_f32x4(v); + + y[i].qs[4*j + 0] = wasm_i32x4_extract_lane(vi, 0); + y[i].qs[4*j + 1] = wasm_i32x4_extract_lane(vi, 1); + y[i].qs[4*j + 2] = wasm_i32x4_extract_lane(vi, 2); + y[i].qs[4*j + 3] = wasm_i32x4_extract_lane(vi, 3); + } + } +#elif defined(__AVX2__) || defined(__AVX__) + for (int i = 0; i < nb; i++) { + // Load elements into 4 AVX vectors + __m256 v0 = _mm256_loadu_ps( x ); + __m256 v1 = _mm256_loadu_ps( x + 8 ); + __m256 v2 = _mm256_loadu_ps( x + 16 ); + __m256 v3 = _mm256_loadu_ps( x + 24 ); + x += 32; + + // Compute max(abs(e)) for the block + const __m256 signBit = _mm256_set1_ps( -0.0f ); + __m256 maxAbs = _mm256_andnot_ps( signBit, v0 ); + maxAbs = _mm256_max_ps( maxAbs, _mm256_andnot_ps( signBit, v1 ) ); + maxAbs = _mm256_max_ps( maxAbs, _mm256_andnot_ps( signBit, v2 ) ); + maxAbs = _mm256_max_ps( maxAbs, _mm256_andnot_ps( signBit, v3 ) ); + + __m128 max4 = _mm_max_ps( _mm256_extractf128_ps( maxAbs, 1 ), _mm256_castps256_ps128( maxAbs ) ); + max4 = _mm_max_ps( max4, _mm_movehl_ps( max4, max4 ) ); + max4 = _mm_max_ss( max4, _mm_movehdup_ps( max4 ) ); + const float maxScalar = _mm_cvtss_f32( max4 ); + + // Quantize these floats + const float d = maxScalar / 127.f; + y[i].d = GGML_FP32_TO_FP16(d); + const float id = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f; + const __m256 mul = _mm256_set1_ps( id ); + + // Apply the multiplier + v0 = _mm256_mul_ps( v0, mul ); + v1 = _mm256_mul_ps( v1, mul ); + v2 = _mm256_mul_ps( v2, mul ); + v3 = _mm256_mul_ps( v3, mul ); + + // Round to nearest integer + v0 = _mm256_round_ps( v0, _MM_ROUND_NEAREST ); + v1 = _mm256_round_ps( v1, _MM_ROUND_NEAREST ); + v2 = _mm256_round_ps( v2, _MM_ROUND_NEAREST ); + v3 = _mm256_round_ps( v3, _MM_ROUND_NEAREST ); + + // Convert floats to integers + __m256i i0 = _mm256_cvtps_epi32( v0 ); + __m256i i1 = _mm256_cvtps_epi32( v1 ); + __m256i i2 = _mm256_cvtps_epi32( v2 ); + __m256i i3 = _mm256_cvtps_epi32( v3 ); + +#if defined(__AVX2__) + // Convert int32 to int16 + i0 = _mm256_packs_epi32( i0, i1 ); // 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15 + i2 = _mm256_packs_epi32( i2, i3 ); // 16, 17, 18, 19, 24, 25, 26, 27, 20, 21, 22, 23, 28, 29, 30, 31 + // Convert int16 to int8 + i0 = _mm256_packs_epi16( i0, i2 ); // 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 + + // We got our precious signed bytes, but the order is now wrong + // These AVX2 pack instructions process 16-byte pieces independently + // The following instruction is fixing the order + const __m256i perm = _mm256_setr_epi32( 0, 4, 1, 5, 2, 6, 3, 7 ); + i0 = _mm256_permutevar8x32_epi32( i0, perm ); + + _mm256_storeu_si256((__m256i *)y[i].qs, i0); +#else + // Since we don't have in AVX some necessary functions, + // we split the registers in half and call AVX2 analogs from SSE + __m128i ni0 = _mm256_castsi256_si128( i0 ); + __m128i ni1 = _mm256_extractf128_si256( i0, 1); + __m128i ni2 = _mm256_castsi256_si128( i1 ); + __m128i ni3 = _mm256_extractf128_si256( i1, 1); + __m128i ni4 = _mm256_castsi256_si128( i2 ); + __m128i ni5 = _mm256_extractf128_si256( i2, 1); + __m128i ni6 = _mm256_castsi256_si128( i3 ); + __m128i ni7 = _mm256_extractf128_si256( i3, 1); + + // Convert int32 to int16 + ni0 = _mm_packs_epi32( ni0, ni1 ); + ni2 = _mm_packs_epi32( ni2, ni3 ); + ni4 = _mm_packs_epi32( ni4, ni5 ); + ni6 = _mm_packs_epi32( ni6, ni7 ); + // Convert int16 to int8 + ni0 = _mm_packs_epi16( ni0, ni2 ); + ni4 = _mm_packs_epi16( ni4, ni6 ); + + _mm_storeu_si128((__m128i *)(y[i].qs + 0), ni0); + _mm_storeu_si128((__m128i *)(y[i].qs + 16), ni4); +#endif + } +#else + // scalar + quantize_row_q8_0_reference(x, y, k); +#endif +} + +// reference implementation for deterministic creation of model files +static void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) { + assert(QK8_1 == 32); + assert(k % QK8_1 == 0); + const int nb = k / QK8_1; + + for (int i = 0; i < nb; i++) { + float amax = 0.0f; // absolute max + + for (int j = 0; j < QK8_1; j++) { + const float v = x[i*QK8_1 + j]; + amax = MAX(amax, fabsf(v)); + } + + const float d = amax / ((1 << 7) - 1); + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = d; + + int sum = 0; + + for (int j = 0; j < QK8_1/2; ++j) { + const float v0 = x[i*QK8_1 + j]*id; + const float v1 = x[i*QK8_1 + QK8_1/2 + j]*id; + + y[i].qs[ j] = roundf(v0); + y[i].qs[QK8_1/2 + j] = roundf(v1); + + sum += y[i].qs[ j]; + sum += y[i].qs[QK8_1/2 + j]; + } + + y[i].s = sum*d; + } +} + +static void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { + assert(k % QK8_1 == 0); + const int nb = k / QK8_1; + + block_q8_1 * restrict y = vy; + +#if defined(__ARM_NEON) + for (int i = 0; i < nb; i++) { + float32x4_t srcv [8]; + float32x4_t asrcv[8]; + float32x4_t amaxv[8]; + + for (int j = 0; j < 8; j++) srcv[j] = vld1q_f32(x + i*32 + 4*j); + for (int j = 0; j < 8; j++) asrcv[j] = vabsq_f32(srcv[j]); + + for (int j = 0; j < 4; j++) amaxv[2*j] = vmaxq_f32(asrcv[2*j], asrcv[2*j+1]); + for (int j = 0; j < 2; j++) amaxv[4*j] = vmaxq_f32(amaxv[4*j], amaxv[4*j+2]); + for (int j = 0; j < 1; j++) amaxv[8*j] = vmaxq_f32(amaxv[8*j], amaxv[8*j+4]); + + const float amax = vmaxvq_f32(amaxv[0]); + + const float d = amax / ((1 << 7) - 1); + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = d; + + int32x4_t accv = vdupq_n_s32(0); + + for (int j = 0; j < 8; j++) { + const float32x4_t v = vmulq_n_f32(srcv[j], id); + const int32x4_t vi = vcvtnq_s32_f32(v); + + y[i].qs[4*j + 0] = vgetq_lane_s32(vi, 0); + y[i].qs[4*j + 1] = vgetq_lane_s32(vi, 1); + y[i].qs[4*j + 2] = vgetq_lane_s32(vi, 2); + y[i].qs[4*j + 3] = vgetq_lane_s32(vi, 3); + + accv = vaddq_s32(accv, vi); + } + + y[i].s = d * vaddvq_s32(accv); + } +#elif defined(__wasm_simd128__) + for (int i = 0; i < nb; i++) { + v128_t srcv [8]; + v128_t asrcv[8]; + v128_t amaxv[8]; + + for (int j = 0; j < 8; j++) srcv[j] = wasm_v128_load(x + i*32 + 4*j); + for (int j = 0; j < 8; j++) asrcv[j] = wasm_f32x4_abs(srcv[j]); + + for (int j = 0; j < 4; j++) amaxv[2*j] = wasm_f32x4_max(asrcv[2*j], asrcv[2*j+1]); + for (int j = 0; j < 2; j++) amaxv[4*j] = wasm_f32x4_max(amaxv[4*j], amaxv[4*j+2]); + for (int j = 0; j < 1; j++) amaxv[8*j] = wasm_f32x4_max(amaxv[8*j], amaxv[8*j+4]); + + const float amax = MAX(MAX(wasm_f32x4_extract_lane(amaxv[0], 0), + wasm_f32x4_extract_lane(amaxv[0], 1)), + MAX(wasm_f32x4_extract_lane(amaxv[0], 2), + wasm_f32x4_extract_lane(amaxv[0], 3))); + + const float d = amax / ((1 << 7) - 1); + const float id = d ? 1.0f/d : 0.0f; + + y[i].d = d; + + v128_t accv = wasm_i32x4_splat(0); + + for (int j = 0; j < 8; j++) { + const v128_t v = wasm_f32x4_mul(srcv[j], wasm_f32x4_splat(id)); + const v128_t vi = wasm_i32x4_trunc_sat_f32x4(v); + + y[i].qs[4*j + 0] = wasm_i32x4_extract_lane(vi, 0); + y[i].qs[4*j + 1] = wasm_i32x4_extract_lane(vi, 1); + y[i].qs[4*j + 2] = wasm_i32x4_extract_lane(vi, 2); + y[i].qs[4*j + 3] = wasm_i32x4_extract_lane(vi, 3); + + accv = wasm_i32x4_add(accv, vi); + } + + y[i].s = d * (wasm_i32x4_extract_lane(accv, 0) + + wasm_i32x4_extract_lane(accv, 1) + + wasm_i32x4_extract_lane(accv, 2) + + wasm_i32x4_extract_lane(accv, 3)); + } +#elif defined(__AVX2__) || defined(__AVX__) + for (int i = 0; i < nb; i++) { + // Load elements into 4 AVX vectors + __m256 v0 = _mm256_loadu_ps( x ); + __m256 v1 = _mm256_loadu_ps( x + 8 ); + __m256 v2 = _mm256_loadu_ps( x + 16 ); + __m256 v3 = _mm256_loadu_ps( x + 24 ); + x += 32; + + // Compute max(abs(e)) for the block + const __m256 signBit = _mm256_set1_ps( -0.0f ); + __m256 maxAbs = _mm256_andnot_ps( signBit, v0 ); + maxAbs = _mm256_max_ps( maxAbs, _mm256_andnot_ps( signBit, v1 ) ); + maxAbs = _mm256_max_ps( maxAbs, _mm256_andnot_ps( signBit, v2 ) ); + maxAbs = _mm256_max_ps( maxAbs, _mm256_andnot_ps( signBit, v3 ) ); + + __m128 max4 = _mm_max_ps( _mm256_extractf128_ps( maxAbs, 1 ), _mm256_castps256_ps128( maxAbs ) ); + max4 = _mm_max_ps( max4, _mm_movehl_ps( max4, max4 ) ); + max4 = _mm_max_ss( max4, _mm_movehdup_ps( max4 ) ); + const float maxScalar = _mm_cvtss_f32( max4 ); + + // Quantize these floats + const float d = maxScalar / 127.f; + y[i].d = d; + const float id = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f; + const __m256 mul = _mm256_set1_ps( id ); + + // Apply the multiplier + v0 = _mm256_mul_ps( v0, mul ); + v1 = _mm256_mul_ps( v1, mul ); + v2 = _mm256_mul_ps( v2, mul ); + v3 = _mm256_mul_ps( v3, mul ); + + // Round to nearest integer + v0 = _mm256_round_ps( v0, _MM_ROUND_NEAREST ); + v1 = _mm256_round_ps( v1, _MM_ROUND_NEAREST ); + v2 = _mm256_round_ps( v2, _MM_ROUND_NEAREST ); + v3 = _mm256_round_ps( v3, _MM_ROUND_NEAREST ); + + // Convert floats to integers + __m256i i0 = _mm256_cvtps_epi32( v0 ); + __m256i i1 = _mm256_cvtps_epi32( v1 ); + __m256i i2 = _mm256_cvtps_epi32( v2 ); + __m256i i3 = _mm256_cvtps_epi32( v3 ); + +#if defined(__AVX2__) + // Compute the sum of the quants and set y[i].s + y[i].s = d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3))); + + // Convert int32 to int16 + i0 = _mm256_packs_epi32( i0, i1 ); // 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15 + i2 = _mm256_packs_epi32( i2, i3 ); // 16, 17, 18, 19, 24, 25, 26, 27, 20, 21, 22, 23, 28, 29, 30, 31 + // Convert int16 to int8 + i0 = _mm256_packs_epi16( i0, i2 ); // 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 + + // We got our precious signed bytes, but the order is now wrong + // These AVX2 pack instructions process 16-byte pieces independently + // The following instruction is fixing the order + const __m256i perm = _mm256_setr_epi32( 0, 4, 1, 5, 2, 6, 3, 7 ); + i0 = _mm256_permutevar8x32_epi32( i0, perm ); + + _mm256_storeu_si256((__m256i *)y[i].qs, i0); +#else + // Since we don't have in AVX some necessary functions, + // we split the registers in half and call AVX2 analogs from SSE + __m128i ni0 = _mm256_castsi256_si128( i0 ); + __m128i ni1 = _mm256_extractf128_si256( i0, 1); + __m128i ni2 = _mm256_castsi256_si128( i1 ); + __m128i ni3 = _mm256_extractf128_si256( i1, 1); + __m128i ni4 = _mm256_castsi256_si128( i2 ); + __m128i ni5 = _mm256_extractf128_si256( i2, 1); + __m128i ni6 = _mm256_castsi256_si128( i3 ); + __m128i ni7 = _mm256_extractf128_si256( i3, 1); + + // Compute the sum of the quants and set y[i].s + const __m128i s0 = _mm_add_epi32(_mm_add_epi32(ni0, ni1), _mm_add_epi32(ni2, ni3)); + const __m128i s1 = _mm_add_epi32(_mm_add_epi32(ni4, ni5), _mm_add_epi32(ni6, ni7)); + y[i].s = d * hsum_i32_4(_mm_add_epi32(s0, s1)); + + // Convert int32 to int16 + ni0 = _mm_packs_epi32( ni0, ni1 ); + ni2 = _mm_packs_epi32( ni2, ni3 ); + ni4 = _mm_packs_epi32( ni4, ni5 ); + ni6 = _mm_packs_epi32( ni6, ni7 ); + // Convert int16 to int8 + ni0 = _mm_packs_epi16( ni0, ni2 ); + ni4 = _mm_packs_epi16( ni4, ni6 ); + + _mm_storeu_si128((__m128i *)(y[i].qs + 0), ni0); + _mm_storeu_si128((__m128i *)(y[i].qs + 16), ni4); +#endif + } +#else + // scalar + quantize_row_q8_1_reference(x, y, k); +#endif +} + +static void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k) { + static const int qk = QK4_0; + + assert(k % qk == 0); + + const int nb = k / qk; + + for (int i = 0; i < nb; i++) { + const float d = GGML_FP16_TO_FP32(x[i].d); + + for (int j = 0; j < qk/2; ++j) { + const int x0 = (x[i].qs[j] & 0x0F) - 8; + const int x1 = (x[i].qs[j] >> 4) - 8; + + y[i*qk + j + 0 ] = x0*d; + y[i*qk + j + qk/2] = x1*d; + } + } +} + +static void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) { + static const int qk = QK4_1; + + assert(k % qk == 0); + + const int nb = k / qk; + + for (int i = 0; i < nb; i++) { + const float d = GGML_FP16_TO_FP32(x[i].d); + const float m = GGML_FP16_TO_FP32(x[i].m); + + for (int j = 0; j < qk/2; ++j) { + const int x0 = (x[i].qs[j] & 0x0F); + const int x1 = (x[i].qs[j] >> 4); + + y[i*qk + j + 0 ] = x0*d + m; + y[i*qk + j + qk/2] = x1*d + m; + } + } +} + +static void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k) { + static const int qk = QK5_0; + + assert(k % qk == 0); + + const int nb = k / qk; + + for (int i = 0; i < nb; i++) { + const float d = GGML_FP16_TO_FP32(x[i].d); + + uint32_t qh; + memcpy(&qh, x[i].qh, sizeof(qh)); + + for (int j = 0; j < qk/2; ++j) { + const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10; + const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10; + + const int32_t x0 = ((x[i].qs[j] & 0x0F) | xh_0) - 16; + const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16; + + y[i*qk + j + 0 ] = x0*d; + y[i*qk + j + qk/2] = x1*d; + } + } +} + +static void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k) { + static const int qk = QK5_1; + + assert(k % qk == 0); + + const int nb = k / qk; + + for (int i = 0; i < nb; i++) { + const float d = GGML_FP16_TO_FP32(x[i].d); + const float m = GGML_FP16_TO_FP32(x[i].m); + + uint32_t qh; + memcpy(&qh, x[i].qh, sizeof(qh)); + + for (int j = 0; j < qk/2; ++j) { + const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10; + const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10; + + const int x0 = (x[i].qs[j] & 0x0F) | xh_0; + const int x1 = (x[i].qs[j] >> 4) | xh_1; + + y[i*qk + j + 0 ] = x0*d + m; + y[i*qk + j + qk/2] = x1*d + m; + } + } +} + +static void dequantize_row_q8_0(const void * restrict vx, float * restrict y, int k) { + static const int qk = QK8_0; + + assert(k % qk == 0); + + const int nb = k / qk; + + const block_q8_0 * restrict x = vx; + + for (int i = 0; i < nb; i++) { + const float d = GGML_FP16_TO_FP32(x[i].d); + + for (int j = 0; j < qk; ++j) { + y[i*qk + j] = x[i].qs[j]*d; + } + } +} + +static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); +static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); +static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); +static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); +static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); + +static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = { + [GGML_TYPE_Q4_0] = { + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_0, + .quantize_row_q = quantize_row_q4_0, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference, + .quantize_row_q_dot = quantize_row_q8_0, + .vec_dot_q = ggml_vec_dot_q4_0_q8_0, + .vec_dot_type = GGML_TYPE_Q8_0, + }, + [GGML_TYPE_Q4_1] = { + .dequantize_row_q = (dequantize_row_q_t)dequantize_row_q4_1, + .quantize_row_q = quantize_row_q4_1, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference, + .quantize_row_q_dot = quantize_row_q8_1, + .vec_dot_q = ggml_vec_dot_q4_1_q8_1, + .vec_dot_type = GGML_TYPE_Q8_1, + }, + [GGML_TYPE_Q5_0] = { + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q5_0, + .quantize_row_q = quantize_row_q5_0, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q5_0_reference, + .quantize_row_q_dot = quantize_row_q8_0, + .vec_dot_q = ggml_vec_dot_q5_0_q8_0, + .vec_dot_type = GGML_TYPE_Q8_0, + }, + [GGML_TYPE_Q5_1] = { + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q5_1, + .quantize_row_q = quantize_row_q5_1, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q5_1_reference, + .quantize_row_q_dot = quantize_row_q8_1, + .vec_dot_q = ggml_vec_dot_q5_1_q8_1, + .vec_dot_type = GGML_TYPE_Q8_1, + }, + [GGML_TYPE_Q8_0] = { + .dequantize_row_q = dequantize_row_q8_0, + .quantize_row_q = quantize_row_q8_0, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q8_0_reference, + .quantize_row_q_dot = quantize_row_q8_0, + .vec_dot_q = ggml_vec_dot_q8_0_q8_0, + .vec_dot_type = GGML_TYPE_Q8_0, + }, + [GGML_TYPE_Q8_1] = { + .dequantize_row_q = NULL, // TODO + .quantize_row_q = quantize_row_q8_1, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q8_1_reference, + .quantize_row_q_dot = quantize_row_q8_1, + .vec_dot_q = NULL, // TODO + .vec_dot_type = GGML_TYPE_Q8_1, + }, +#ifdef GGML_USE_K_QUANTS + [GGML_TYPE_Q2_K] = { + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q2_K, + .quantize_row_q = quantize_row_q2_K, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q2_K_reference, + .quantize_row_q_dot = quantize_row_q8_K, + .vec_dot_q = ggml_vec_dot_q2_K_q8_K, + .vec_dot_type = GGML_TYPE_Q8_K, + }, + [GGML_TYPE_Q3_K] = { + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q3_K, + .quantize_row_q = quantize_row_q3_K, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q3_K_reference, + .quantize_row_q_dot = quantize_row_q8_K, + .vec_dot_q = ggml_vec_dot_q3_K_q8_K, + .vec_dot_type = GGML_TYPE_Q8_K, + }, + [GGML_TYPE_Q4_K] = { + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_K, + .quantize_row_q = quantize_row_q4_K, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_K_reference, + .quantize_row_q_dot = quantize_row_q8_K, + .vec_dot_q = ggml_vec_dot_q4_K_q8_K, + .vec_dot_type = GGML_TYPE_Q8_K, + }, + [GGML_TYPE_Q5_K] = { + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q5_K, + .quantize_row_q = quantize_row_q5_K, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q5_K_reference, + .quantize_row_q_dot = quantize_row_q8_K, + .vec_dot_q = ggml_vec_dot_q5_K_q8_K, + .vec_dot_type = GGML_TYPE_Q8_K, + }, + [GGML_TYPE_Q6_K] = { + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q6_K, + .quantize_row_q = quantize_row_q6_K, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q6_K_reference, + .quantize_row_q_dot = quantize_row_q8_K, + .vec_dot_q = ggml_vec_dot_q6_K_q8_K, + .vec_dot_type = GGML_TYPE_Q8_K, + }, +#endif +}; + +// For internal test use +quantize_fns_t ggml_internal_get_quantize_fn(size_t i) { + GGML_ASSERT(i < GGML_TYPE_COUNT); + return quantize_fns[i]; +} + + +// +// simd mappings +// + +// we define a common set of C macros which map to specific intrinsics based on the current architecture +// we then implement the fundamental computation operations below using only these macros +// adding support for new architectures requires to define the corresponding SIMD macros +// +// GGML_F32_STEP / GGML_F16_STEP +// number of elements to process in a single step +// +// GGML_F32_EPR / GGML_F16_EPR +// number of elements to fit in a single register +// + +#if defined(__ARM_NEON) && defined(__ARM_FEATURE_FMA) + +#define GGML_SIMD + +// F32 NEON + +#define GGML_F32_STEP 16 +#define GGML_F32_EPR 4 + +#define GGML_F32x4 float32x4_t +#define GGML_F32x4_ZERO vdupq_n_f32(0.0f) +#define GGML_F32x4_SET1(x) vdupq_n_f32(x) +#define GGML_F32x4_LOAD vld1q_f32 +#define GGML_F32x4_STORE vst1q_f32 +#define GGML_F32x4_FMA(a, b, c) vfmaq_f32(a, b, c) +#define GGML_F32x4_ADD vaddq_f32 +#define GGML_F32x4_MUL vmulq_f32 +#define GGML_F32x4_REDUCE_ONE(x) vaddvq_f32(x) +#define GGML_F32x4_REDUCE(res, x) \ +{ \ + for (int i = 0; i < GGML_F32_ARR/2; ++i) { \ + x[2*i] = vaddq_f32(x[2*i], x[2*i+1]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/4; ++i) { \ + x[4*i] = vaddq_f32(x[4*i], x[4*i+2]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/8; ++i) { \ + x[8*i] = vaddq_f32(x[8*i], x[8*i+4]); \ + } \ + res = GGML_F32x4_REDUCE_ONE(x[0]); \ +} + +#define GGML_F32_VEC GGML_F32x4 +#define GGML_F32_VEC_ZERO GGML_F32x4_ZERO +#define GGML_F32_VEC_SET1 GGML_F32x4_SET1 +#define GGML_F32_VEC_LOAD GGML_F32x4_LOAD +#define GGML_F32_VEC_STORE GGML_F32x4_STORE +#define GGML_F32_VEC_FMA GGML_F32x4_FMA +#define GGML_F32_VEC_ADD GGML_F32x4_ADD +#define GGML_F32_VEC_MUL GGML_F32x4_MUL +#define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE + +// F16 NEON + +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #define GGML_F16_STEP 32 + #define GGML_F16_EPR 8 + + #define GGML_F16x8 float16x8_t + #define GGML_F16x8_ZERO vdupq_n_f16(0.0f) + #define GGML_F16x8_SET1(x) vdupq_n_f16(x) + #define GGML_F16x8_LOAD vld1q_f16 + #define GGML_F16x8_STORE vst1q_f16 + #define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c) + #define GGML_F16x8_ADD vaddq_f16 + #define GGML_F16x8_MUL vmulq_f16 + #define GGML_F16x8_REDUCE(res, x) \ + { \ + for (int i = 0; i < GGML_F16_ARR/2; ++i) { \ + x[2*i] = vaddq_f16(x[2*i], x[2*i+1]); \ + } \ + for (int i = 0; i < GGML_F16_ARR/4; ++i) { \ + x[4*i] = vaddq_f16(x[4*i], x[4*i+2]); \ + } \ + for (int i = 0; i < GGML_F16_ARR/8; ++i) { \ + x[8*i] = vaddq_f16(x[8*i], x[8*i+4]); \ + } \ + const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 (x[0])); \ + const float32x4_t t1 = vcvt_f32_f16(vget_high_f16(x[0])); \ + res = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \ + } + + #define GGML_F16_VEC GGML_F16x8 + #define GGML_F16_VEC_ZERO GGML_F16x8_ZERO + #define GGML_F16_VEC_SET1 GGML_F16x8_SET1 + #define GGML_F16_VEC_LOAD(p, i) GGML_F16x8_LOAD(p) + #define GGML_F16_VEC_STORE(p, r, i) GGML_F16x8_STORE(p, r[i]) + #define GGML_F16_VEC_FMA GGML_F16x8_FMA + #define GGML_F16_VEC_ADD GGML_F16x8_ADD + #define GGML_F16_VEC_MUL GGML_F16x8_MUL + #define GGML_F16_VEC_REDUCE GGML_F16x8_REDUCE +#else + // if FP16 vector arithmetic is not supported, we use FP32 instead + // and take advantage of the vcvt_ functions to convert to/from FP16 + + #define GGML_F16_STEP 16 + #define GGML_F16_EPR 4 + + #define GGML_F32Cx4 float32x4_t + #define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f) + #define GGML_F32Cx4_SET1(x) vdupq_n_f32(x) + #define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16(x)) + #define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y)) + #define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c) + #define GGML_F32Cx4_ADD vaddq_f32 + #define GGML_F32Cx4_MUL vmulq_f32 + #define GGML_F32Cx4_REDUCE GGML_F32x4_REDUCE + + #define GGML_F16_VEC GGML_F32Cx4 + #define GGML_F16_VEC_ZERO GGML_F32Cx4_ZERO + #define GGML_F16_VEC_SET1 GGML_F32Cx4_SET1 + #define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx4_LOAD(p) + #define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx4_STORE(p, r[i]) + #define GGML_F16_VEC_FMA GGML_F32Cx4_FMA + #define GGML_F16_VEC_ADD GGML_F32Cx4_ADD + #define GGML_F16_VEC_MUL GGML_F32Cx4_MUL + #define GGML_F16_VEC_REDUCE GGML_F32Cx4_REDUCE +#endif + +#elif defined(__AVX__) + +#define GGML_SIMD + +// F32 AVX + +#define GGML_F32_STEP 32 +#define GGML_F32_EPR 8 + +#define GGML_F32x8 __m256 +#define GGML_F32x8_ZERO _mm256_setzero_ps() +#define GGML_F32x8_SET1(x) _mm256_set1_ps(x) +#define GGML_F32x8_LOAD _mm256_loadu_ps +#define GGML_F32x8_STORE _mm256_storeu_ps +#if defined(__FMA__) + #define GGML_F32x8_FMA(a, b, c) _mm256_fmadd_ps(b, c, a) +#else + #define GGML_F32x8_FMA(a, b, c) _mm256_add_ps(_mm256_mul_ps(b, c), a) +#endif +#define GGML_F32x8_ADD _mm256_add_ps +#define GGML_F32x8_MUL _mm256_mul_ps +#define GGML_F32x8_REDUCE(res, x) \ +{ \ + for (int i = 0; i < GGML_F32_ARR/2; ++i) { \ + x[2*i] = _mm256_add_ps(x[2*i], x[2*i+1]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/4; ++i) { \ + x[4*i] = _mm256_add_ps(x[4*i], x[4*i+2]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/8; ++i) { \ + x[8*i] = _mm256_add_ps(x[8*i], x[8*i+4]); \ + } \ + const __m128 t0 = _mm_add_ps(_mm256_castps256_ps128(x[0]), \ + _mm256_extractf128_ps(x[0], 1)); \ + const __m128 t1 = _mm_hadd_ps(t0, t0); \ + res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \ +} +// TODO: is this optimal ? + +#define GGML_F32_VEC GGML_F32x8 +#define GGML_F32_VEC_ZERO GGML_F32x8_ZERO +#define GGML_F32_VEC_SET1 GGML_F32x8_SET1 +#define GGML_F32_VEC_LOAD GGML_F32x8_LOAD +#define GGML_F32_VEC_STORE GGML_F32x8_STORE +#define GGML_F32_VEC_FMA GGML_F32x8_FMA +#define GGML_F32_VEC_ADD GGML_F32x8_ADD +#define GGML_F32_VEC_MUL GGML_F32x8_MUL +#define GGML_F32_VEC_REDUCE GGML_F32x8_REDUCE + +// F16 AVX + +#define GGML_F16_STEP 32 +#define GGML_F16_EPR 8 + +// F16 arithmetic is not supported by AVX, so we use F32 instead + +#define GGML_F32Cx8 __m256 +#define GGML_F32Cx8_ZERO _mm256_setzero_ps() +#define GGML_F32Cx8_SET1(x) _mm256_set1_ps(x) + +#if defined(__F16C__) +// the _mm256_cvt intrinsics require F16C +#define GGML_F32Cx8_LOAD(x) _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(x))) +#define GGML_F32Cx8_STORE(x, y) _mm_storeu_si128((__m128i *)(x), _mm256_cvtps_ph(y, 0)) +#else +static inline __m256 __avx_f32cx8_load(ggml_fp16_t *x) { + float tmp[8]; + + for (int i = 0; i < 8; i++) { + tmp[i] = GGML_FP16_TO_FP32(x[i]); + } + + return _mm256_loadu_ps(tmp); +} +static inline void __avx_f32cx8_store(ggml_fp16_t *x, __m256 y) { + float arr[8]; + + _mm256_storeu_ps(arr, y); + + for (int i = 0; i < 8; i++) + x[i] = GGML_FP32_TO_FP16(arr[i]); +} +#define GGML_F32Cx8_LOAD(x) __avx_f32cx8_load(x) +#define GGML_F32Cx8_STORE(x, y) __avx_f32cx8_store(x, y) +#endif + +#define GGML_F32Cx8_FMA GGML_F32x8_FMA +#define GGML_F32Cx8_ADD _mm256_add_ps +#define GGML_F32Cx8_MUL _mm256_mul_ps +#define GGML_F32Cx8_REDUCE GGML_F32x8_REDUCE + +#define GGML_F16_VEC GGML_F32Cx8 +#define GGML_F16_VEC_ZERO GGML_F32Cx8_ZERO +#define GGML_F16_VEC_SET1 GGML_F32Cx8_SET1 +#define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx8_LOAD(p) +#define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx8_STORE(p, r[i]) +#define GGML_F16_VEC_FMA GGML_F32Cx8_FMA +#define GGML_F16_VEC_ADD GGML_F32Cx8_ADD +#define GGML_F16_VEC_MUL GGML_F32Cx8_MUL +#define GGML_F16_VEC_REDUCE GGML_F32Cx8_REDUCE + +#elif defined(__POWER9_VECTOR__) + +#define GGML_SIMD + +// F32 POWER9 + +#define GGML_F32_STEP 32 +#define GGML_F32_EPR 4 + +#define GGML_F32x4 vector float +#define GGML_F32x4_ZERO 0.0f +#define GGML_F32x4_SET1 vec_splats +#define GGML_F32x4_LOAD(p) vec_xl(0, p) +#define GGML_F32x4_STORE(p, r) vec_xst(r, 0, p) +#define GGML_F32x4_FMA(a, b, c) vec_madd(b, c, a) +#define GGML_F32x4_ADD vec_add +#define GGML_F32x4_MUL vec_mul +#define GGML_F32x4_REDUCE(res, x) \ +{ \ + for (int i = 0; i < GGML_F32_ARR/2; ++i) { \ + x[2*i] = vec_add(x[2*i], x[2*i+1]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/4; ++i) { \ + x[4*i] = vec_add(x[4*i], x[4*i+2]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/8; ++i) { \ + x[8*i] = vec_add(x[8*i], x[8*i+4]); \ + } \ + res = vec_extract(x[0], 0) + \ + vec_extract(x[0], 1) + \ + vec_extract(x[0], 2) + \ + vec_extract(x[0], 3); \ +} + +#define GGML_F32_VEC GGML_F32x4 +#define GGML_F32_VEC_ZERO GGML_F32x4_ZERO +#define GGML_F32_VEC_SET1 GGML_F32x4_SET1 +#define GGML_F32_VEC_LOAD GGML_F32x4_LOAD +#define GGML_F32_VEC_STORE GGML_F32x4_STORE +#define GGML_F32_VEC_FMA GGML_F32x4_FMA +#define GGML_F32_VEC_ADD GGML_F32x4_ADD +#define GGML_F32_VEC_MUL GGML_F32x4_MUL +#define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE + +// F16 POWER9 +#define GGML_F16_STEP GGML_F32_STEP +#define GGML_F16_EPR GGML_F32_EPR +#define GGML_F16_VEC GGML_F32x4 +#define GGML_F16_VEC_ZERO GGML_F32x4_ZERO +#define GGML_F16_VEC_SET1 GGML_F32x4_SET1 +#define GGML_F16_VEC_FMA GGML_F32x4_FMA +#define GGML_F16_VEC_REDUCE GGML_F32x4_REDUCE +// Use vec_xl, not vec_ld, in case the load address is not aligned. +#define GGML_F16_VEC_LOAD(p, i) (i & 0x1) ? \ + vec_extract_fp32_from_shorth(vec_xl(0, p - GGML_F16_EPR)) : \ + vec_extract_fp32_from_shortl(vec_xl(0, p)) +#define GGML_ENDIAN_BYTE(i) ((unsigned char *)&(uint16_t){1})[i] +#define GGML_F16_VEC_STORE(p, r, i) \ + if (i & 0x1) \ + vec_xst(vec_pack_to_short_fp32(r[i - GGML_ENDIAN_BYTE(1)], \ + r[i - GGML_ENDIAN_BYTE(0)]), \ + 0, p - GGML_F16_EPR) + +#elif defined(__wasm_simd128__) + +#define GGML_SIMD + +// F32 WASM + +#define GGML_F32_STEP 16 +#define GGML_F32_EPR 4 + +#define GGML_F32x4 v128_t +#define GGML_F32x4_ZERO wasm_f32x4_splat(0.0f) +#define GGML_F32x4_SET1(x) wasm_f32x4_splat(x) +#define GGML_F32x4_LOAD wasm_v128_load +#define GGML_F32x4_STORE wasm_v128_store +#define GGML_F32x4_FMA(a, b, c) wasm_f32x4_add(wasm_f32x4_mul(b, c), a) +#define GGML_F32x4_ADD wasm_f32x4_add +#define GGML_F32x4_MUL wasm_f32x4_mul +#define GGML_F32x4_REDUCE(res, x) \ +{ \ + for (int i = 0; i < GGML_F32_ARR/2; ++i) { \ + x[2*i] = wasm_f32x4_add(x[2*i], x[2*i+1]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/4; ++i) { \ + x[4*i] = wasm_f32x4_add(x[4*i], x[4*i+2]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/8; ++i) { \ + x[8*i] = wasm_f32x4_add(x[8*i], x[8*i+4]); \ + } \ + res = wasm_f32x4_extract_lane(x[0], 0) + \ + wasm_f32x4_extract_lane(x[0], 1) + \ + wasm_f32x4_extract_lane(x[0], 2) + \ + wasm_f32x4_extract_lane(x[0], 3); \ +} + +#define GGML_F32_VEC GGML_F32x4 +#define GGML_F32_VEC_ZERO GGML_F32x4_ZERO +#define GGML_F32_VEC_SET1 GGML_F32x4_SET1 +#define GGML_F32_VEC_LOAD GGML_F32x4_LOAD +#define GGML_F32_VEC_STORE GGML_F32x4_STORE +#define GGML_F32_VEC_FMA GGML_F32x4_FMA +#define GGML_F32_VEC_ADD GGML_F32x4_ADD +#define GGML_F32_VEC_MUL GGML_F32x4_MUL +#define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE + +// F16 WASM + +#define GGML_F16_STEP 16 +#define GGML_F16_EPR 4 + +inline static v128_t __wasm_f16x4_load(const ggml_fp16_t * p) { + float tmp[4]; + + tmp[0] = GGML_FP16_TO_FP32(p[0]); + tmp[1] = GGML_FP16_TO_FP32(p[1]); + tmp[2] = GGML_FP16_TO_FP32(p[2]); + tmp[3] = GGML_FP16_TO_FP32(p[3]); + + return wasm_v128_load(tmp); +} + +inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) { + float tmp[4]; + + wasm_v128_store(tmp, x); + + p[0] = GGML_FP32_TO_FP16(tmp[0]); + p[1] = GGML_FP32_TO_FP16(tmp[1]); + p[2] = GGML_FP32_TO_FP16(tmp[2]); + p[3] = GGML_FP32_TO_FP16(tmp[3]); +} + +#define GGML_F16x4 v128_t +#define GGML_F16x4_ZERO wasm_f32x4_splat(0.0f) +#define GGML_F16x4_SET1(x) wasm_f32x4_splat(x) +#define GGML_F16x4_LOAD(x) __wasm_f16x4_load(x) +#define GGML_F16x4_STORE(x, y) __wasm_f16x4_store(x, y) +#define GGML_F16x4_FMA GGML_F32x4_FMA +#define GGML_F16x4_ADD wasm_f32x4_add +#define GGML_F16x4_MUL wasm_f32x4_mul +#define GGML_F16x4_REDUCE(res, x) \ +{ \ + for (int i = 0; i < GGML_F16_ARR/2; ++i) { \ + x[2*i] = wasm_f32x4_add(x[2*i], x[2*i+1]); \ + } \ + for (int i = 0; i < GGML_F16_ARR/4; ++i) { \ + x[4*i] = wasm_f32x4_add(x[4*i], x[4*i+2]); \ + } \ + for (int i = 0; i < GGML_F16_ARR/8; ++i) { \ + x[8*i] = wasm_f32x4_add(x[8*i], x[8*i+4]); \ + } \ + res = wasm_f32x4_extract_lane(x[0], 0) + \ + wasm_f32x4_extract_lane(x[0], 1) + \ + wasm_f32x4_extract_lane(x[0], 2) + \ + wasm_f32x4_extract_lane(x[0], 3); \ +} + +#define GGML_F16_VEC GGML_F16x4 +#define GGML_F16_VEC_ZERO GGML_F16x4_ZERO +#define GGML_F16_VEC_SET1 GGML_F16x4_SET1 +#define GGML_F16_VEC_LOAD(p, i) GGML_F16x4_LOAD(p) +#define GGML_F16_VEC_STORE(p, r, i) GGML_F16x4_STORE(p, r[i]) +#define GGML_F16_VEC_FMA GGML_F16x4_FMA +#define GGML_F16_VEC_ADD GGML_F16x4_ADD +#define GGML_F16_VEC_MUL GGML_F16x4_MUL +#define GGML_F16_VEC_REDUCE GGML_F16x4_REDUCE + +#elif defined(__SSE3__) + +#define GGML_SIMD + +// F32 SSE + +#define GGML_F32_STEP 32 +#define GGML_F32_EPR 4 + +#define GGML_F32x4 __m128 +#define GGML_F32x4_ZERO _mm_setzero_ps() +#define GGML_F32x4_SET1(x) _mm_set1_ps(x) +#define GGML_F32x4_LOAD _mm_loadu_ps +#define GGML_F32x4_STORE _mm_storeu_ps +#if defined(__FMA__) + // TODO: Does this work? + #define GGML_F32x4_FMA(a, b, c) _mm_fmadd_ps(b, c, a) +#else + #define GGML_F32x4_FMA(a, b, c) _mm_add_ps(_mm_mul_ps(b, c), a) +#endif +#define GGML_F32x4_ADD _mm_add_ps +#define GGML_F32x4_MUL _mm_mul_ps +#define GGML_F32x4_REDUCE(res, x) \ +{ \ + for (int i = 0; i < GGML_F32_ARR/2; ++i) { \ + x[2*i] = _mm_add_ps(x[2*i], x[2*i+1]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/4; ++i) { \ + x[4*i] = _mm_add_ps(x[4*i], x[4*i+2]); \ + } \ + for (int i = 0; i < GGML_F32_ARR/8; ++i) { \ + x[8*i] = _mm_add_ps(x[8*i], x[8*i+4]); \ + } \ + const __m128 t0 = _mm_hadd_ps(x[0], x[0]); \ + res = _mm_cvtss_f32(_mm_hadd_ps(t0, t0)); \ +} +// TODO: is this optimal ? + +#define GGML_F32_VEC GGML_F32x4 +#define GGML_F32_VEC_ZERO GGML_F32x4_ZERO +#define GGML_F32_VEC_SET1 GGML_F32x4_SET1 +#define GGML_F32_VEC_LOAD GGML_F32x4_LOAD +#define GGML_F32_VEC_STORE GGML_F32x4_STORE +#define GGML_F32_VEC_FMA GGML_F32x4_FMA +#define GGML_F32_VEC_ADD GGML_F32x4_ADD +#define GGML_F32_VEC_MUL GGML_F32x4_MUL +#define GGML_F32_VEC_REDUCE GGML_F32x4_REDUCE + +// F16 SSE + +#define GGML_F16_STEP 32 +#define GGML_F16_EPR 4 + +static inline __m128 __sse_f16x4_load(ggml_fp16_t *x) { + float tmp[4]; + + tmp[0] = GGML_FP16_TO_FP32(x[0]); + tmp[1] = GGML_FP16_TO_FP32(x[1]); + tmp[2] = GGML_FP16_TO_FP32(x[2]); + tmp[3] = GGML_FP16_TO_FP32(x[3]); + + return _mm_loadu_ps(tmp); +} + +static inline void __sse_f16x4_store(ggml_fp16_t *x, __m128 y) { + float arr[4]; + + _mm_storeu_ps(arr, y); + + x[0] = GGML_FP32_TO_FP16(arr[0]); + x[1] = GGML_FP32_TO_FP16(arr[1]); + x[2] = GGML_FP32_TO_FP16(arr[2]); + x[3] = GGML_FP32_TO_FP16(arr[3]); +} + +#define GGML_F32Cx4 __m128 +#define GGML_F32Cx4_ZERO _mm_setzero_ps() +#define GGML_F32Cx4_SET1(x) _mm_set1_ps(x) +#define GGML_F32Cx4_LOAD(x) __sse_f16x4_load(x) +#define GGML_F32Cx4_STORE(x, y) __sse_f16x4_store(x, y) +#define GGML_F32Cx4_FMA GGML_F32x4_FMA +#define GGML_F32Cx4_ADD _mm_add_ps +#define GGML_F32Cx4_MUL _mm_mul_ps +#define GGML_F32Cx4_REDUCE GGML_F32x4_REDUCE + +#define GGML_F16_VEC GGML_F32Cx4 +#define GGML_F16_VEC_ZERO GGML_F32Cx4_ZERO +#define GGML_F16_VEC_SET1 GGML_F32Cx4_SET1 +#define GGML_F16_VEC_LOAD(p, i) GGML_F32Cx4_LOAD(p) +#define GGML_F16_VEC_STORE(p, r, i) GGML_F32Cx4_STORE(p, r[i]) +#define GGML_F16_VEC_FMA GGML_F32Cx4_FMA +#define GGML_F16_VEC_ADD GGML_F32Cx4_ADD +#define GGML_F16_VEC_MUL GGML_F32Cx4_MUL +#define GGML_F16_VEC_REDUCE GGML_F32Cx4_REDUCE + +#endif + +// GGML_F32_ARR / GGML_F16_ARR +// number of registers to use per step +#ifdef GGML_SIMD +#define GGML_F32_ARR (GGML_F32_STEP/GGML_F32_EPR) +#define GGML_F16_ARR (GGML_F16_STEP/GGML_F16_EPR) +#endif + +// +// fundamental operations +// + +inline static void ggml_vec_set_i8(const int n, int8_t * x, const int8_t v) { for (int i = 0; i < n; ++i) x[i] = v; } + +inline static void ggml_vec_set_i16(const int n, int16_t * x, const int16_t v) { for (int i = 0; i < n; ++i) x[i] = v; } + +inline static void ggml_vec_set_i32(const int n, int32_t * x, const int32_t v) { for (int i = 0; i < n; ++i) x[i] = v; } + +inline static void ggml_vec_set_f16(const int n, ggml_fp16_t * x, const int32_t v) { for (int i = 0; i < n; ++i) x[i] = v; } + +inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; } +inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i = 0; i < n; ++i) z[i] = x[i] + v; } +inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] += x[i]; } +inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] += v; } +inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; } +inline static void ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i = 0; i < n; ++i) x[i] = v; } +inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]; } +inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; } +inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; } +inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; } + +inline static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y) { +#ifdef GGML_SIMD + float sumf = 0.0f; + const int np = (n & ~(GGML_F32_STEP - 1)); + + GGML_F32_VEC sum[GGML_F32_ARR] = { GGML_F32_VEC_ZERO }; + + GGML_F32_VEC ax[GGML_F32_ARR]; + GGML_F32_VEC ay[GGML_F32_ARR]; + + for (int i = 0; i < np; i += GGML_F32_STEP) { + for (int j = 0; j < GGML_F32_ARR; j++) { + ax[j] = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR); + ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR); + + sum[j] = GGML_F32_VEC_FMA(sum[j], ax[j], ay[j]); + } + } + + // reduce sum0..sum3 to sum0 + GGML_F32_VEC_REDUCE(sumf, sum); + + // leftovers + for (int i = np; i < n; ++i) { + sumf += x[i]*y[i]; + } +#else + // scalar + ggml_float sumf = 0.0; + for (int i = 0; i < n; ++i) { + sumf += (ggml_float)(x[i]*y[i]); + } +#endif + + *s = sumf; +} + +inline static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y) { + ggml_float sumf = 0.0; + +#if defined(GGML_SIMD) + const int np = (n & ~(GGML_F16_STEP - 1)); + + GGML_F16_VEC sum[GGML_F16_ARR] = { GGML_F16_VEC_ZERO }; + + GGML_F16_VEC ax[GGML_F16_ARR]; + GGML_F16_VEC ay[GGML_F16_ARR]; + + for (int i = 0; i < np; i += GGML_F16_STEP) { + for (int j = 0; j < GGML_F16_ARR; j++) { + ax[j] = GGML_F16_VEC_LOAD(x + i + j*GGML_F16_EPR, j); + ay[j] = GGML_F16_VEC_LOAD(y + i + j*GGML_F16_EPR, j); + + sum[j] = GGML_F16_VEC_FMA(sum[j], ax[j], ay[j]); + } + } + + // reduce sum0..sum3 to sum0 + GGML_F16_VEC_REDUCE(sumf, sum); + + // leftovers + for (int i = np; i < n; ++i) { + sumf += (ggml_float)(GGML_FP16_TO_FP32(x[i])*GGML_FP16_TO_FP32(y[i])); + } +#else + for (int i = 0; i < n; ++i) { + sumf += (ggml_float)(GGML_FP16_TO_FP32(x[i])*GGML_FP16_TO_FP32(y[i])); + } +#endif + + *s = sumf; +} + +static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + const int qk = QK8_0; + const int nb = n / qk; + + assert(n % qk == 0); + assert(nb % 2 == 0); + + const block_q4_0 * restrict x = vx; + const block_q8_0 * restrict y = vy; + +#if defined(__ARM_NEON) + float32x4_t sumv0 = vdupq_n_f32(0.0f); + float32x4_t sumv1 = vdupq_n_f32(0.0f); + + for (int i = 0; i < nb; i += 2) { + const block_q4_0 * restrict x0 = &x[i + 0]; + const block_q4_0 * restrict x1 = &x[i + 1]; + const block_q8_0 * restrict y0 = &y[i + 0]; + const block_q8_0 * restrict y1 = &y[i + 1]; + + const uint8x16_t m4b = vdupq_n_u8(0x0F); + const int8x16_t s8b = vdupq_n_s8(0x8); + + const uint8x16_t v0_0 = vld1q_u8(x0->qs); + const uint8x16_t v0_1 = vld1q_u8(x1->qs); + + // 4-bit -> 8-bit + const int8x16_t v0_0l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); + const int8x16_t v0_0h = vreinterpretq_s8_u8(vshrq_n_u8(v0_0, 4)); + const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); + const int8x16_t v0_1h = vreinterpretq_s8_u8(vshrq_n_u8(v0_1, 4)); + + // sub 8 + const int8x16_t v0_0ls = vsubq_s8(v0_0l, s8b); + const int8x16_t v0_0hs = vsubq_s8(v0_0h, s8b); + const int8x16_t v0_1ls = vsubq_s8(v0_1l, s8b); + const int8x16_t v0_1hs = vsubq_s8(v0_1h, s8b); + + // load y + const int8x16_t v1_0l = vld1q_s8(y0->qs); + const int8x16_t v1_0h = vld1q_s8(y0->qs + 16); + const int8x16_t v1_1l = vld1q_s8(y1->qs); + const int8x16_t v1_1h = vld1q_s8(y1->qs + 16); + +#if defined(__ARM_FEATURE_DOTPROD) + // dot product into int32x4_t + const int32x4_t p_0 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_0ls, v1_0l), v0_0hs, v1_0h); + const int32x4_t p_1 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_1ls, v1_1l), v0_1hs, v1_1h); + + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); +#else + const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0ls), vget_low_s8 (v1_0l)); + const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0ls), vget_high_s8(v1_0l)); + const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hs), vget_low_s8 (v1_0h)); + const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0hs), vget_high_s8(v1_0h)); + + const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1ls), vget_low_s8 (v1_1l)); + const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1ls), vget_high_s8(v1_1l)); + const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1hs), vget_low_s8 (v1_1h)); + const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1hs), vget_high_s8(v1_1h)); + + const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h)); + const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h)); + const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h)); + const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h)); + + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); +#endif + } + + *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); +#elif defined(__AVX2__) + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + + // Main loop + for (int i = 0; i < nb; ++i) { + /* Compute combined scale for the block */ + const __m256 d = _mm256_set1_ps( GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d) ); + + __m256i bx = bytes_from_nibbles_32(x[i].qs); + + // Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval. + const __m256i off = _mm256_set1_epi8( 8 ); + bx = _mm256_sub_epi8( bx, off ); + + __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs); + + const __m256 q = mul_sum_i8_pairs_float(bx, by); + + /* Multiply q with scale and accumulate */ + acc = _mm256_fmadd_ps( d, q, acc ); + } + + *s = hsum_float_8(acc); +#elif defined(__AVX__) + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + + // Main loop + for (int i = 0; i < nb; ++i) { + // Compute combined scale for the block + const __m256 d = _mm256_set1_ps( GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d) ); + + const __m128i lowMask = _mm_set1_epi8(0xF); + const __m128i off = _mm_set1_epi8(8); + + const __m128i tmp = _mm_loadu_si128((const __m128i *)x[i].qs); + + __m128i bx = _mm_and_si128(lowMask, tmp); + __m128i by = _mm_loadu_si128((const __m128i *)y[i].qs); + bx = _mm_sub_epi8(bx, off); + const __m128i i32_0 = mul_sum_i8_pairs(bx, by); + + bx = _mm_and_si128(lowMask, _mm_srli_epi64(tmp, 4)); + by = _mm_loadu_si128((const __m128i *)(y[i].qs + 16)); + bx = _mm_sub_epi8(bx, off); + const __m128i i32_1 = mul_sum_i8_pairs(bx, by); + + // Convert int32_t to float + __m256 p = _mm256_cvtepi32_ps(MM256_SET_M128I(i32_0, i32_1)); + + // Apply the scale, and accumulate + acc = _mm256_add_ps(_mm256_mul_ps( d, p ), acc); + } + + *s = hsum_float_8(acc); +#elif defined(__SSSE3__) + // set constants + const __m128i lowMask = _mm_set1_epi8(0xF); + const __m128i off = _mm_set1_epi8(8); + + // Initialize accumulator with zeros + __m128 acc_0 = _mm_setzero_ps(); + __m128 acc_1 = _mm_setzero_ps(); + __m128 acc_2 = _mm_setzero_ps(); + __m128 acc_3 = _mm_setzero_ps(); + + // First round without accumulation + { + _mm_prefetch(&x[0] + sizeof(block_q4_0), _MM_HINT_T0); + _mm_prefetch(&y[0] + sizeof(block_q8_0), _MM_HINT_T0); + + // Compute combined scale for the block 0 and 1 + const __m128 d_0_1 = _mm_set1_ps( GGML_FP16_TO_FP32(x[0].d) * GGML_FP16_TO_FP32(y[0].d) ); + + const __m128i tmp_0_1 = _mm_loadu_si128((const __m128i *)x[0].qs); + + __m128i bx_0 = _mm_and_si128(lowMask, tmp_0_1); + __m128i by_0 = _mm_loadu_si128((const __m128i *)y[0].qs); + bx_0 = _mm_sub_epi8(bx_0, off); + const __m128i i32_0 = mul_sum_i8_pairs(bx_0, by_0); + + __m128i bx_1 = _mm_and_si128(lowMask, _mm_srli_epi64(tmp_0_1, 4)); + __m128i by_1 = _mm_loadu_si128((const __m128i *)(y[0].qs + 16)); + bx_1 = _mm_sub_epi8(bx_1, off); + const __m128i i32_1 = mul_sum_i8_pairs(bx_1, by_1); + + _mm_prefetch(&x[1] + sizeof(block_q4_0), _MM_HINT_T0); + _mm_prefetch(&y[1] + sizeof(block_q8_0), _MM_HINT_T0); + + // Compute combined scale for the block 2 and 3 + const __m128 d_2_3 = _mm_set1_ps( GGML_FP16_TO_FP32(x[1].d) * GGML_FP16_TO_FP32(y[1].d) ); + + const __m128i tmp_2_3 = _mm_loadu_si128((const __m128i *)x[1].qs); + + __m128i bx_2 = _mm_and_si128(lowMask, tmp_2_3); + __m128i by_2 = _mm_loadu_si128((const __m128i *)y[1].qs); + bx_2 = _mm_sub_epi8(bx_2, off); + const __m128i i32_2 = mul_sum_i8_pairs(bx_2, by_2); + + __m128i bx_3 = _mm_and_si128(lowMask, _mm_srli_epi64(tmp_2_3, 4)); + __m128i by_3 = _mm_loadu_si128((const __m128i *)(y[1].qs + 16)); + bx_3 = _mm_sub_epi8(bx_3, off); + const __m128i i32_3 = mul_sum_i8_pairs(bx_3, by_3); + + // Convert int32_t to float + __m128 p0 = _mm_cvtepi32_ps(i32_0); + __m128 p1 = _mm_cvtepi32_ps(i32_1); + __m128 p2 = _mm_cvtepi32_ps(i32_2); + __m128 p3 = _mm_cvtepi32_ps(i32_3); + + // Apply the scale + acc_0 = _mm_mul_ps( d_0_1, p0 ); + acc_1 = _mm_mul_ps( d_0_1, p1 ); + acc_2 = _mm_mul_ps( d_2_3, p2 ); + acc_3 = _mm_mul_ps( d_2_3, p3 ); + } + + // Main loop + for (int i = 2; i < nb; i+=2) { + _mm_prefetch(&x[i] + sizeof(block_q4_0), _MM_HINT_T0); + _mm_prefetch(&y[i] + sizeof(block_q8_0), _MM_HINT_T0); + + // Compute combined scale for the block 0 and 1 + const __m128 d_0_1 = _mm_set1_ps( GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d) ); + + const __m128i tmp_0_1 = _mm_loadu_si128((const __m128i *)x[i].qs); + + __m128i bx_0 = _mm_and_si128(lowMask, tmp_0_1); + __m128i by_0 = _mm_loadu_si128((const __m128i *)y[i].qs); + bx_0 = _mm_sub_epi8(bx_0, off); + const __m128i i32_0 = mul_sum_i8_pairs(bx_0, by_0); + + __m128i bx_1 = _mm_and_si128(lowMask, _mm_srli_epi64(tmp_0_1, 4)); + __m128i by_1 = _mm_loadu_si128((const __m128i *)(y[i].qs + 16)); + bx_1 = _mm_sub_epi8(bx_1, off); + const __m128i i32_1 = mul_sum_i8_pairs(bx_1, by_1); + + _mm_prefetch(&x[i] + 2 * sizeof(block_q4_0), _MM_HINT_T0); + _mm_prefetch(&y[i] + 2 * sizeof(block_q8_0), _MM_HINT_T0); + + // Compute combined scale for the block 2 and 3 + const __m128 d_2_3 = _mm_set1_ps( GGML_FP16_TO_FP32(x[i + 1].d) * GGML_FP16_TO_FP32(y[i + 1].d) ); + + const __m128i tmp_2_3 = _mm_loadu_si128((const __m128i *)x[i + 1].qs); + + __m128i bx_2 = _mm_and_si128(lowMask, tmp_2_3); + __m128i by_2 = _mm_loadu_si128((const __m128i *)y[i + 1].qs); + bx_2 = _mm_sub_epi8(bx_2, off); + const __m128i i32_2 = mul_sum_i8_pairs(bx_2, by_2); + + __m128i bx_3 = _mm_and_si128(lowMask, _mm_srli_epi64(tmp_2_3, 4)); + __m128i by_3 = _mm_loadu_si128((const __m128i *)(y[i + 1].qs + 16)); + bx_3 = _mm_sub_epi8(bx_3, off); + const __m128i i32_3 = mul_sum_i8_pairs(bx_3, by_3); + + // Convert int32_t to float + __m128 p0 = _mm_cvtepi32_ps(i32_0); + __m128 p1 = _mm_cvtepi32_ps(i32_1); + __m128 p2 = _mm_cvtepi32_ps(i32_2); + __m128 p3 = _mm_cvtepi32_ps(i32_3); + + // Apply the scale + __m128 p0_d = _mm_mul_ps( d_0_1, p0 ); + __m128 p1_d = _mm_mul_ps( d_0_1, p1 ); + __m128 p2_d = _mm_mul_ps( d_2_3, p2 ); + __m128 p3_d = _mm_mul_ps( d_2_3, p3 ); + + // Acummulate + acc_0 = _mm_add_ps(p0_d, acc_0); + acc_1 = _mm_add_ps(p1_d, acc_1); + acc_2 = _mm_add_ps(p2_d, acc_2); + acc_3 = _mm_add_ps(p3_d, acc_3); + } + + *s = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3); +#else + // scalar + float sumf = 0.0; + + for (int i = 0; i < nb; i++) { + int sumi = 0; + + for (int j = 0; j < qk/2; ++j) { + const int v0 = (x[i].qs[j] & 0x0F) - 8; + const int v1 = (x[i].qs[j] >> 4) - 8; + + sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]); + } + + sumf += sumi*GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d); + } + + *s = sumf; +#endif +} + +static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + const int qk = QK8_1; + const int nb = n / qk; + + assert(n % qk == 0); + assert(nb % 2 == 0); + + const block_q4_1 * restrict x = vx; + const block_q8_1 * restrict y = vy; + + // TODO: add WASM SIMD +#if defined(__ARM_NEON) + float32x4_t sumv0 = vdupq_n_f32(0.0f); + float32x4_t sumv1 = vdupq_n_f32(0.0f); + + float summs = 0; + + for (int i = 0; i < nb; i += 2) { + const block_q4_1 * restrict x0 = &x[i + 0]; + const block_q4_1 * restrict x1 = &x[i + 1]; + const block_q8_1 * restrict y0 = &y[i + 0]; + const block_q8_1 * restrict y1 = &y[i + 1]; + + summs += GGML_FP16_TO_FP32(x0->m) * y0->s + GGML_FP16_TO_FP32(x1->m) * y1->s; + + const uint8x16_t m4b = vdupq_n_u8(0x0F); + + const uint8x16_t v0_0 = vld1q_u8(x0->qs); + const uint8x16_t v0_1 = vld1q_u8(x1->qs); + + // 4-bit -> 8-bit + const int8x16_t v0_0l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); + const int8x16_t v0_0h = vreinterpretq_s8_u8(vshrq_n_u8(v0_0, 4)); + const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); + const int8x16_t v0_1h = vreinterpretq_s8_u8(vshrq_n_u8(v0_1, 4)); + + // load y + const int8x16_t v1_0l = vld1q_s8(y0->qs); + const int8x16_t v1_0h = vld1q_s8(y0->qs + 16); + const int8x16_t v1_1l = vld1q_s8(y1->qs); + const int8x16_t v1_1h = vld1q_s8(y1->qs + 16); + +#if defined(__ARM_FEATURE_DOTPROD) + // dot product into int32x4_t + const int32x4_t p_0 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0l), v0_0h, v1_0h); + const int32x4_t p_1 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_1l, v1_1l), v0_1h, v1_1h); + + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*y0->d); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*y1->d); +#else + const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0l), vget_low_s8 (v1_0l)); + const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0l), vget_high_s8(v1_0l)); + const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0h), vget_low_s8 (v1_0h)); + const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0h), vget_high_s8(v1_0h)); + + const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1l), vget_low_s8 (v1_1l)); + const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1l), vget_high_s8(v1_1l)); + const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1h), vget_low_s8 (v1_1h)); + const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1h), vget_high_s8(v1_1h)); + + const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h)); + const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h)); + const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h)); + const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h)); + + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), GGML_FP16_TO_FP32(x0->d)*y0->d); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), GGML_FP16_TO_FP32(x1->d)*y1->d); +#endif + } + + *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs; +#elif defined(__AVX2__) || defined(__AVX__) + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + + float summs = 0; + + // Main loop + for (int i = 0; i < nb; ++i) { + const float d0 = GGML_FP16_TO_FP32(x[i].d); + const float d1 = y[i].d; + + summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s; + + const __m256 d0v = _mm256_set1_ps( d0 ); + const __m256 d1v = _mm256_set1_ps( d1 ); + + // Compute combined scales + const __m256 d0d1 = _mm256_mul_ps( d0v, d1v ); + + // Load 16 bytes, and unpack 4 bit fields into bytes, making 32 bytes + const __m256i bx = bytes_from_nibbles_32(x[i].qs); + const __m256i by = _mm256_loadu_si256( (const __m256i *)y[i].qs ); + + const __m256 xy = mul_sum_us8_pairs_float(bx, by); + + // Accumulate d0*d1*x*y +#if defined(__AVX2__) + acc = _mm256_fmadd_ps( d0d1, xy, acc ); +#else + acc = _mm256_add_ps( _mm256_mul_ps( d0d1, xy ), acc ); +#endif + } + + *s = hsum_float_8(acc) + summs; +#else + // scalar + float sumf = 0.0; + + for (int i = 0; i < nb; i++) { + int sumi = 0; + + for (int j = 0; j < qk/2; ++j) { + const int v0 = (x[i].qs[j] & 0x0F); + const int v1 = (x[i].qs[j] >> 4); + + sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]); + } + + sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s; + } + + *s = sumf; +#endif +} + +static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + const int qk = QK8_0; + const int nb = n / qk; + + assert(n % qk == 0); + assert(nb % 2 == 0); + assert(qk == QK5_0); + + const block_q5_0 * restrict x = vx; + const block_q8_0 * restrict y = vy; + +#if defined(__ARM_NEON) + float32x4_t sumv0 = vdupq_n_f32(0.0f); + float32x4_t sumv1 = vdupq_n_f32(0.0f); + + uint32_t qh0; + uint32_t qh1; + + uint64_t tmp0[4]; + uint64_t tmp1[4]; + + for (int i = 0; i < nb; i += 2) { + const block_q5_0 * restrict x0 = &x[i]; + const block_q5_0 * restrict x1 = &x[i + 1]; + const block_q8_0 * restrict y0 = &y[i]; + const block_q8_0 * restrict y1 = &y[i + 1]; + + const uint8x16_t m4b = vdupq_n_u8(0x0F); + + // extract the 5th bit via lookup table ((!b) << 4) + memcpy(&qh0, x0->qh, sizeof(qh0)); + memcpy(&qh1, x1->qh, sizeof(qh1)); + + tmp0[0] = table_b2b_1[(qh0 >> 0) & 0xFF]; + tmp0[1] = table_b2b_1[(qh0 >> 8) & 0xFF]; + tmp0[2] = table_b2b_1[(qh0 >> 16) & 0xFF]; + tmp0[3] = table_b2b_1[(qh0 >> 24) ]; + + tmp1[0] = table_b2b_1[(qh1 >> 0) & 0xFF]; + tmp1[1] = table_b2b_1[(qh1 >> 8) & 0xFF]; + tmp1[2] = table_b2b_1[(qh1 >> 16) & 0xFF]; + tmp1[3] = table_b2b_1[(qh1 >> 24) ]; + + const int8x16_t qhl0 = vld1q_s8((const int8_t *)(tmp0 + 0)); + const int8x16_t qhh0 = vld1q_s8((const int8_t *)(tmp0 + 2)); + const int8x16_t qhl1 = vld1q_s8((const int8_t *)(tmp1 + 0)); + const int8x16_t qhh1 = vld1q_s8((const int8_t *)(tmp1 + 2)); + + const uint8x16_t v0_0 = vld1q_u8(x0->qs); + const uint8x16_t v0_1 = vld1q_u8(x1->qs); + + // 4-bit -> 8-bit + int8x16_t v0_0l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); + int8x16_t v0_0h = vreinterpretq_s8_u8(vshrq_n_u8(v0_0, 4)); + int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); + int8x16_t v0_1h = vreinterpretq_s8_u8(vshrq_n_u8(v0_1, 4)); + + // add high bit and sub 16 (equivalent to sub 0x10 when bit is zero) + const int8x16_t v0_0lf = vsubq_s8(v0_0l, qhl0); + const int8x16_t v0_0hf = vsubq_s8(v0_0h, qhh0); + const int8x16_t v0_1lf = vsubq_s8(v0_1l, qhl1); + const int8x16_t v0_1hf = vsubq_s8(v0_1h, qhh1); + + // load y + const int8x16_t v1_0l = vld1q_s8(y0->qs); + const int8x16_t v1_0h = vld1q_s8(y0->qs + 16); + const int8x16_t v1_1l = vld1q_s8(y1->qs); + const int8x16_t v1_1h = vld1q_s8(y1->qs + 16); + +#if defined(__ARM_FEATURE_DOTPROD) + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32( + vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l), + vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32( + vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l), + vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); +#else + const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0lf), vget_low_s8 (v1_0l)); + const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0lf), vget_high_s8(v1_0l)); + const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hf), vget_low_s8 (v1_0h)); + const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0hf), vget_high_s8(v1_0h)); + + const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1lf), vget_low_s8 (v1_1l)); + const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1lf), vget_high_s8(v1_1l)); + const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1hf), vget_low_s8 (v1_1h)); + const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1hf), vget_high_s8(v1_1h)); + + const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h)); + const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h)); + const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h)); + const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h)); + + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); +#endif + } + + *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); +#elif defined(__wasm_simd128__) + v128_t sumv = wasm_f32x4_splat(0.0f); + + uint32_t qh; + uint64_t tmp[4]; + + // TODO: check if unrolling this is better + for (int i = 0; i < nb; ++i) { + const block_q5_0 * restrict x0 = &x[i]; + const block_q8_0 * restrict y0 = &y[i]; + + const v128_t m4b = wasm_i8x16_splat(0x0F); + + // extract the 5th bit + memcpy(&qh, x0->qh, sizeof(qh)); + + tmp[0] = table_b2b_1[(qh >> 0) & 0xFF]; + tmp[1] = table_b2b_1[(qh >> 8) & 0xFF]; + tmp[2] = table_b2b_1[(qh >> 16) & 0xFF]; + tmp[3] = table_b2b_1[(qh >> 24) ]; + + const v128_t qhl = wasm_v128_load(tmp + 0); + const v128_t qhh = wasm_v128_load(tmp + 2); + + const v128_t v0 = wasm_v128_load(x0->qs); + + // 4-bit -> 8-bit + const v128_t v0l = wasm_v128_and (v0, m4b); + const v128_t v0h = wasm_u8x16_shr(v0, 4); + + // add high bit and sub 16 (equivalent to sub 0x10 when bit is zero) + const v128_t v0lf = wasm_i8x16_sub(v0l, qhl); + const v128_t v0hf = wasm_i8x16_sub(v0h, qhh); + + // load y + const v128_t v1l = wasm_v128_load(y0->qs); + const v128_t v1h = wasm_v128_load(y0->qs + 16); + + // int8x16 -> int16x8 + const v128_t v0lfl = wasm_i16x8_extend_low_i8x16 (v0lf); + const v128_t v0lfh = wasm_i16x8_extend_high_i8x16(v0lf); + const v128_t v0hfl = wasm_i16x8_extend_low_i8x16 (v0hf); + const v128_t v0hfh = wasm_i16x8_extend_high_i8x16(v0hf); + + const v128_t v1ll = wasm_i16x8_extend_low_i8x16 (v1l); + const v128_t v1lh = wasm_i16x8_extend_high_i8x16(v1l); + const v128_t v1hl = wasm_i16x8_extend_low_i8x16 (v1h); + const v128_t v1hh = wasm_i16x8_extend_high_i8x16(v1h); + + // dot product + sumv = wasm_f32x4_add(sumv, wasm_f32x4_mul(wasm_f32x4_convert_i32x4( + wasm_i32x4_add( + wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0lfl, v1ll), + wasm_i32x4_dot_i16x8(v0lfh, v1lh)), + wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl), + wasm_i32x4_dot_i16x8(v0hfh, v1hh)))), + wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d)))); + } + + *s = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) + + wasm_f32x4_extract_lane(sumv, 2) + wasm_f32x4_extract_lane(sumv, 3); +#elif defined(__AVX2__) + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + + // Main loop + for (int i = 0; i < nb; i++) { + /* Compute combined scale for the block */ + const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d)); + + __m256i bx = bytes_from_nibbles_32(x[i].qs); + __m256i bxhi = bytes_from_bits_32(x[i].qh); + bxhi = _mm256_andnot_si256(bxhi, _mm256_set1_epi8((char)0xF0)); + bx = _mm256_or_si256(bx, bxhi); + + __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs); + + const __m256 q = mul_sum_i8_pairs_float(bx, by); + + /* Multiply q with scale and accumulate */ + acc = _mm256_fmadd_ps(d, q, acc); + } + + *s = hsum_float_8(acc); +#elif defined(__AVX__) + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + __m128i mask = _mm_set1_epi8((char)0xF0); + + // Main loop + for (int i = 0; i < nb; i++) { + /* Compute combined scale for the block */ + const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d)); + + __m256i bx = bytes_from_nibbles_32(x[i].qs); + const __m256i bxhi = bytes_from_bits_32(x[i].qh); + __m128i bxhil = _mm256_castsi256_si128(bxhi); + __m128i bxhih = _mm256_extractf128_si256(bxhi, 1); + bxhil = _mm_andnot_si128(bxhil, mask); + bxhih = _mm_andnot_si128(bxhih, mask); + __m128i bxl = _mm256_castsi256_si128(bx); + __m128i bxh = _mm256_extractf128_si256(bx, 1); + bxl = _mm_or_si128(bxl, bxhil); + bxh = _mm_or_si128(bxh, bxhih); + bx = MM256_SET_M128I(bxh, bxl); + + const __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs); + + const __m256 q = mul_sum_i8_pairs_float(bx, by); + + /* Multiply q with scale and accumulate */ + acc = _mm256_add_ps(_mm256_mul_ps(d, q), acc); + } + + *s = hsum_float_8(acc); +#else + // scalar + float sumf = 0.0; + + for (int i = 0; i < nb; i++) { + uint32_t qh; + memcpy(&qh, x[i].qh, sizeof(qh)); + + int sumi = 0; + + for (int j = 0; j < qk/2; ++j) { + const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4; + const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12)); + + const int32_t x0 = ((x[i].qs[j] & 0x0F) | xh_0) - 16; + const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16; + + sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]); + } + + sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d)) * sumi; + } + + *s = sumf; +#endif +} + +static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + const int qk = QK8_1; + const int nb = n / qk; + + assert(n % qk == 0); + assert(nb % 2 == 0); + assert(qk == QK5_1); + + const block_q5_1 * restrict x = vx; + const block_q8_1 * restrict y = vy; + +#if defined(__ARM_NEON) + float32x4_t sumv0 = vdupq_n_f32(0.0f); + float32x4_t sumv1 = vdupq_n_f32(0.0f); + + float summs0 = 0.0f; + float summs1 = 0.0f; + + uint32_t qh0; + uint32_t qh1; + + uint64_t tmp0[4]; + uint64_t tmp1[4]; + + for (int i = 0; i < nb; i += 2) { + const block_q5_1 * restrict x0 = &x[i]; + const block_q5_1 * restrict x1 = &x[i + 1]; + const block_q8_1 * restrict y0 = &y[i]; + const block_q8_1 * restrict y1 = &y[i + 1]; + + const uint8x16_t m4b = vdupq_n_u8(0x0F); + + summs0 += GGML_FP16_TO_FP32(x0->m) * y0->s; + summs1 += GGML_FP16_TO_FP32(x1->m) * y1->s; + + // extract the 5th bit via lookup table ((b) << 4) + memcpy(&qh0, x0->qh, sizeof(qh0)); + memcpy(&qh1, x1->qh, sizeof(qh1)); + + tmp0[0] = table_b2b_0[(qh0 >> 0) & 0xFF]; + tmp0[1] = table_b2b_0[(qh0 >> 8) & 0xFF]; + tmp0[2] = table_b2b_0[(qh0 >> 16) & 0xFF]; + tmp0[3] = table_b2b_0[(qh0 >> 24) ]; + + tmp1[0] = table_b2b_0[(qh1 >> 0) & 0xFF]; + tmp1[1] = table_b2b_0[(qh1 >> 8) & 0xFF]; + tmp1[2] = table_b2b_0[(qh1 >> 16) & 0xFF]; + tmp1[3] = table_b2b_0[(qh1 >> 24) ]; + + const int8x16_t qhl0 = vld1q_s8((const int8_t *)(tmp0 + 0)); + const int8x16_t qhh0 = vld1q_s8((const int8_t *)(tmp0 + 2)); + const int8x16_t qhl1 = vld1q_s8((const int8_t *)(tmp1 + 0)); + const int8x16_t qhh1 = vld1q_s8((const int8_t *)(tmp1 + 2)); + + const uint8x16_t v0_0 = vld1q_u8(x0->qs); + const uint8x16_t v0_1 = vld1q_u8(x1->qs); + + // 4-bit -> 8-bit + const int8x16_t v0_0l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); + const int8x16_t v0_0h = vreinterpretq_s8_u8(vshrq_n_u8(v0_0, 4)); + const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); + const int8x16_t v0_1h = vreinterpretq_s8_u8(vshrq_n_u8(v0_1, 4)); + + // add high bit + const int8x16_t v0_0lf = vorrq_s8(v0_0l, qhl0); + const int8x16_t v0_0hf = vorrq_s8(v0_0h, qhh0); + const int8x16_t v0_1lf = vorrq_s8(v0_1l, qhl1); + const int8x16_t v0_1hf = vorrq_s8(v0_1h, qhh1); + + // load y + const int8x16_t v1_0l = vld1q_s8(y0->qs); + const int8x16_t v1_0h = vld1q_s8(y0->qs + 16); + const int8x16_t v1_1l = vld1q_s8(y1->qs); + const int8x16_t v1_1h = vld1q_s8(y1->qs + 16); + +#if defined(__ARM_FEATURE_DOTPROD) + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32( + vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l), + vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*y0->d); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32( + vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l), + vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*y1->d); +#else + const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0lf), vget_low_s8 (v1_0l)); + const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0lf), vget_high_s8(v1_0l)); + const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hf), vget_low_s8 (v1_0h)); + const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0hf), vget_high_s8(v1_0h)); + + const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1lf), vget_low_s8 (v1_1l)); + const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1lf), vget_high_s8(v1_1l)); + const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1hf), vget_low_s8 (v1_1h)); + const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1hf), vget_high_s8(v1_1h)); + + const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h)); + const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h)); + const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h)); + const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h)); + + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), GGML_FP16_TO_FP32(x0->d)*y0->d); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), GGML_FP16_TO_FP32(x1->d)*y1->d); +#endif + } + + *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs0 + summs1; +#elif defined(__wasm_simd128__) + v128_t sumv = wasm_f32x4_splat(0.0f); + + float summs = 0.0f; + + uint32_t qh; + uint64_t tmp[4]; + + // TODO: check if unrolling this is better + for (int i = 0; i < nb; ++i) { + const block_q5_1 * restrict x0 = &x[i]; + const block_q8_1 * restrict y0 = &y[i]; + + summs += GGML_FP16_TO_FP32(x0->m) * y0->s; + + const v128_t m4b = wasm_i8x16_splat(0x0F); + + // extract the 5th bit + memcpy(&qh, x0->qh, sizeof(qh)); + + tmp[0] = table_b2b_0[(qh >> 0) & 0xFF]; + tmp[1] = table_b2b_0[(qh >> 8) & 0xFF]; + tmp[2] = table_b2b_0[(qh >> 16) & 0xFF]; + tmp[3] = table_b2b_0[(qh >> 24) ]; + + const v128_t qhl = wasm_v128_load(tmp + 0); + const v128_t qhh = wasm_v128_load(tmp + 2); + + const v128_t v0 = wasm_v128_load(x0->qs); + + // 4-bit -> 8-bit + const v128_t v0l = wasm_v128_and (v0, m4b); + const v128_t v0h = wasm_u8x16_shr(v0, 4); + + // add high bit + const v128_t v0lf = wasm_v128_or(v0l, qhl); + const v128_t v0hf = wasm_v128_or(v0h, qhh); + + // load y + const v128_t v1l = wasm_v128_load(y0->qs); + const v128_t v1h = wasm_v128_load(y0->qs + 16); + + // int8x16 -> int16x8 + const v128_t v0lfl = wasm_i16x8_extend_low_i8x16 (v0lf); + const v128_t v0lfh = wasm_i16x8_extend_high_i8x16(v0lf); + const v128_t v0hfl = wasm_i16x8_extend_low_i8x16 (v0hf); + const v128_t v0hfh = wasm_i16x8_extend_high_i8x16(v0hf); + + const v128_t v1ll = wasm_i16x8_extend_low_i8x16 (v1l); + const v128_t v1lh = wasm_i16x8_extend_high_i8x16(v1l); + const v128_t v1hl = wasm_i16x8_extend_low_i8x16 (v1h); + const v128_t v1hh = wasm_i16x8_extend_high_i8x16(v1h); + + // dot product + sumv = wasm_f32x4_add(sumv, + wasm_f32x4_mul(wasm_f32x4_convert_i32x4(wasm_i32x4_add( + wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0lfl, v1ll), + wasm_i32x4_dot_i16x8(v0lfh, v1lh)), + wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl), + wasm_i32x4_dot_i16x8(v0hfh, v1hh)))), + wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * y0->d))); + } + + *s = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) + + wasm_f32x4_extract_lane(sumv, 2) + wasm_f32x4_extract_lane(sumv, 3) + summs; +#elif defined(__AVX2__) + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + + float summs = 0.0f; + + // Main loop + for (int i = 0; i < nb; i++) { + const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d)); + + summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s; + + __m256i bx = bytes_from_nibbles_32(x[i].qs); + __m256i bxhi = bytes_from_bits_32(x[i].qh); + bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10)); + bx = _mm256_or_si256(bx, bxhi); + + const __m256 dy = _mm256_set1_ps(y[i].d); + const __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs); + + const __m256 q = mul_sum_us8_pairs_float(bx, by); + + acc = _mm256_fmadd_ps(q, _mm256_mul_ps(dx, dy), acc); + } + + *s = hsum_float_8(acc) + summs; +#elif defined(__AVX__) + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + __m128i mask = _mm_set1_epi8(0x10); + + float summs = 0.0f; + + // Main loop + for (int i = 0; i < nb; i++) { + const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d)); + + summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s; + + __m256i bx = bytes_from_nibbles_32(x[i].qs); + const __m256i bxhi = bytes_from_bits_32(x[i].qh); + __m128i bxhil = _mm256_castsi256_si128(bxhi); + __m128i bxhih = _mm256_extractf128_si256(bxhi, 1); + bxhil = _mm_and_si128(bxhil, mask); + bxhih = _mm_and_si128(bxhih, mask); + __m128i bxl = _mm256_castsi256_si128(bx); + __m128i bxh = _mm256_extractf128_si256(bx, 1); + bxl = _mm_or_si128(bxl, bxhil); + bxh = _mm_or_si128(bxh, bxhih); + bx = MM256_SET_M128I(bxh, bxl); + + const __m256 dy = _mm256_set1_ps(y[i].d); + const __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs); + + const __m256 q = mul_sum_us8_pairs_float(bx, by); + + acc = _mm256_add_ps(_mm256_mul_ps(q, _mm256_mul_ps(dx, dy)), acc); + } + + *s = hsum_float_8(acc) + summs; +#else + // scalar + float sumf = 0.0; + + for (int i = 0; i < nb; i++) { + uint32_t qh; + memcpy(&qh, x[i].qh, sizeof(qh)); + + int sumi = 0; + + for (int j = 0; j < qk/2; ++j) { + const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10; + const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10; + + const int32_t x0 = (x[i].qs[j] & 0xF) | xh_0; + const int32_t x1 = (x[i].qs[j] >> 4) | xh_1; + + sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]); + } + + sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s; + } + + *s = sumf; +#endif +} + +static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + const int qk = QK8_0; + const int nb = n / qk; + + assert(n % qk == 0); + assert(nb % 2 == 0); + + const block_q8_0 * restrict x = vx; + const block_q8_0 * restrict y = vy; + +#if defined(__ARM_NEON) + float32x4_t sumv0 = vdupq_n_f32(0.0f); + float32x4_t sumv1 = vdupq_n_f32(0.0f); + + for (int i = 0; i < nb; i += 2) { + const block_q8_0 * restrict x0 = &x[i + 0]; + const block_q8_0 * restrict x1 = &x[i + 1]; + const block_q8_0 * restrict y0 = &y[i + 0]; + const block_q8_0 * restrict y1 = &y[i + 1]; + + const int8x16_t x0_0 = vld1q_s8(x0->qs); + const int8x16_t x0_1 = vld1q_s8(x0->qs + 16); + const int8x16_t x1_0 = vld1q_s8(x1->qs); + const int8x16_t x1_1 = vld1q_s8(x1->qs + 16); + + // load y + const int8x16_t y0_0 = vld1q_s8(y0->qs); + const int8x16_t y0_1 = vld1q_s8(y0->qs + 16); + const int8x16_t y1_0 = vld1q_s8(y1->qs); + const int8x16_t y1_1 = vld1q_s8(y1->qs + 16); + +#if defined(__ARM_FEATURE_DOTPROD) + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32( + vdotq_s32(vdupq_n_s32(0), x0_0, y0_0), + vdotq_s32(vdupq_n_s32(0), x0_1, y0_1))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32( + vdotq_s32(vdupq_n_s32(0), x1_0, y1_0), + vdotq_s32(vdupq_n_s32(0), x1_1, y1_1))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + +#else + const int16x8_t p0_0 = vmull_s8(vget_low_s8 (x0_0), vget_low_s8 (y0_0)); + const int16x8_t p0_1 = vmull_s8(vget_high_s8(x0_0), vget_high_s8(y0_0)); + const int16x8_t p0_2 = vmull_s8(vget_low_s8 (x0_1), vget_low_s8 (y0_1)); + const int16x8_t p0_3 = vmull_s8(vget_high_s8(x0_1), vget_high_s8(y0_1)); + + const int16x8_t p1_0 = vmull_s8(vget_low_s8 (x1_0), vget_low_s8 (y1_0)); + const int16x8_t p1_1 = vmull_s8(vget_high_s8(x1_0), vget_high_s8(y1_0)); + const int16x8_t p1_2 = vmull_s8(vget_low_s8 (x1_1), vget_low_s8 (y1_1)); + const int16x8_t p1_3 = vmull_s8(vget_high_s8(x1_1), vget_high_s8(y1_1)); + + const int32x4_t p0 = vaddq_s32(vpaddlq_s16(p0_0), vpaddlq_s16(p0_1)); + const int32x4_t p1 = vaddq_s32(vpaddlq_s16(p0_2), vpaddlq_s16(p0_3)); + const int32x4_t p2 = vaddq_s32(vpaddlq_s16(p1_0), vpaddlq_s16(p1_1)); + const int32x4_t p3 = vaddq_s32(vpaddlq_s16(p1_2), vpaddlq_s16(p1_3)); + + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(p0, p1)), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(p2, p3)), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); +#endif + } + + *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); +#elif defined(__AVX2__) || defined(__AVX__) + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + + // Main loop + for (int i = 0; i < nb; ++i) { + // Compute combined scale for the block + const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d)); + __m256i bx = _mm256_loadu_si256((const __m256i *)x[i].qs); + __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs); + + const __m256 q = mul_sum_i8_pairs_float(bx, by); + + // Multiply q with scale and accumulate +#if defined(__AVX2__) + acc = _mm256_fmadd_ps( d, q, acc ); +#else + acc = _mm256_add_ps( _mm256_mul_ps( d, q ), acc ); +#endif + } + + *s = hsum_float_8(acc); +#else + // scalar + float sumf = 0.0; + + for (int i = 0; i < nb; i++) { + int sumi = 0; + + for (int j = 0; j < qk; j++) { + sumi += x[i].qs[j]*y[i].qs[j]; + } + + sumf += sumi*(GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d)); + } + + *s = sumf; +#endif +} + +// compute GGML_VEC_DOT_UNROLL dot products at once +// xs - x row stride in bytes +inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * restrict s, void * restrict xv, ggml_fp16_t * restrict y) { + ggml_float sumf[GGML_VEC_DOT_UNROLL] = { 0.0 }; + + ggml_fp16_t * restrict x[GGML_VEC_DOT_UNROLL]; + + for (int i = 0; i < GGML_VEC_DOT_UNROLL; ++i) { + x[i] = (ggml_fp16_t *) ((char *) xv + i*xs); + } + +#if defined(GGML_SIMD) + const int np = (n & ~(GGML_F16_STEP - 1)); + + GGML_F16_VEC sum[GGML_VEC_DOT_UNROLL][GGML_F16_ARR] = { { GGML_F16_VEC_ZERO } }; + + GGML_F16_VEC ax[GGML_F16_ARR]; + GGML_F16_VEC ay[GGML_F16_ARR]; + + for (int i = 0; i < np; i += GGML_F16_STEP) { + for (int j = 0; j < GGML_F16_ARR; j++) { + ay[j] = GGML_F16_VEC_LOAD(y + i + j*GGML_F16_EPR, j); + + for (int k = 0; k < GGML_VEC_DOT_UNROLL; ++k) { + ax[j] = GGML_F16_VEC_LOAD(x[k] + i + j*GGML_F16_EPR, j); + + sum[k][j] = GGML_F16_VEC_FMA(sum[k][j], ax[j], ay[j]); + } + } + } + + // reduce sum0..sum3 to sum0 + for (int k = 0; k < GGML_VEC_DOT_UNROLL; ++k) { + GGML_F16_VEC_REDUCE(sumf[k], sum[k]); + } + + // leftovers + for (int i = np; i < n; ++i) { + for (int j = 0; j < GGML_VEC_DOT_UNROLL; ++j) { + sumf[j] += (ggml_float)(GGML_FP16_TO_FP32(x[j][i])*GGML_FP16_TO_FP32(y[i])); + } + } +#else + for (int i = 0; i < n; ++i) { + for (int j = 0; j < GGML_VEC_DOT_UNROLL; ++j) { + sumf[j] += (ggml_float)(GGML_FP16_TO_FP32(x[j][i])*GGML_FP16_TO_FP32(y[i])); + } + } +#endif + + for (int i = 0; i < GGML_VEC_DOT_UNROLL; ++i) { + s[i] = sumf[i]; + } +} + +inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float * restrict x, const float v) { +#if defined(GGML_SIMD) + const int np = (n & ~(GGML_F32_STEP - 1)); + + GGML_F32_VEC vx = GGML_F32_VEC_SET1(v); + + GGML_F32_VEC ax[GGML_F32_ARR]; + GGML_F32_VEC ay[GGML_F32_ARR]; + + for (int i = 0; i < np; i += GGML_F32_STEP) { + for (int j = 0; j < GGML_F32_ARR; j++) { + ax[j] = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR); + ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR); + ay[j] = GGML_F32_VEC_FMA(ay[j], ax[j], vx); + + GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]); + } + } + + // leftovers + for (int i = np; i < n; ++i) { + y[i] += x[i]*v; + } +#else + // scalar + for (int i = 0; i < n; ++i) { + y[i] += x[i]*v; + } +#endif +} + +//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; } +inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { +#if defined(GGML_SIMD) + const int np = (n & ~(GGML_F32_STEP - 1)); + + GGML_F32_VEC vx = GGML_F32_VEC_SET1(v); + + GGML_F32_VEC ay[GGML_F32_ARR]; + + for (int i = 0; i < np; i += GGML_F32_STEP) { + for (int j = 0; j < GGML_F32_ARR; j++) { + ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR); + ay[j] = GGML_F32_VEC_MUL(ay[j], vx); + + GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]); + } + } + + // leftovers + for (int i = np; i < n; ++i) { + y[i] *= v; + } +#else + // scalar + for (int i = 0; i < n; ++i) { + y[i] *= v; + } +#endif +} + +inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s = sqrtf(*s); } +inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; } +inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); } +inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); } +inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); } +inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); } +inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; } +inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; } + +static const float GELU_COEF_A = 0.044715f; +static const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f; + +inline static float ggml_gelu_f32(float x) { + return 0.5f*x*(1.0f + tanhf(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x))); +} + +inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { + const uint16_t * i16 = (const uint16_t *) x; + for (int i = 0; i < n; ++i) { + y[i] = table_gelu_f16[i16[i]]; + } +} + +#ifdef GGML_GELU_FP16 +inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) { + uint16_t t; + for (int i = 0; i < n; ++i) { + ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]); + memcpy(&t, &fp16, sizeof(uint16_t)); + y[i] = GGML_FP16_TO_FP32(table_gelu_f16[t]); + } +} +#else +inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) { + for (int i = 0; i < n; ++i) { + y[i] = ggml_gelu_f32(x[i]); + } +} +#endif + +// Sigmoid Linear Unit (SiLU) function +inline static float ggml_silu_f32(float x) { + return x/(1.0f + expf(-x)); +} + +//inline static void ggml_vec_silu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { +// const uint16_t * i16 = (const uint16_t *) x; +// for (int i = 0; i < n; ++i) { +// y[i] = table_silu_f16[i16[i]]; +// } +//} + +#ifdef GGML_SILU_FP16 +inline static void ggml_vec_silu_f32(const int n, float * y, const float * x) { + uint16_t t; + for (int i = 0; i < n; ++i) { + ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]); + memcpy(&t, &fp16, sizeof(uint16_t)); + y[i] = GGML_FP16_TO_FP32(table_silu_f16[t]); + } +} +#else +inline static void ggml_vec_silu_f32(const int n, float * y, const float * x) { + for (int i = 0; i < n; ++i) { + y[i] = ggml_silu_f32(x[i]); + } +} +#endif + +inline static float ggml_silu_backward_f32(float x, float dy) { + const float s = 1.0f/(1.0f + expf(-x)); + return dy*s*(1.0f + x*(1.0f - s)); +} + +#ifdef GGML_SILU_FP16 +inline static void ggml_vec_silu_backward_f32(const int n, float * dx, const float * x, const float * dy) { + for (int i = 0; i < n; ++i) { + // we did not use x[i] to compute forward silu but its f16 equivalent + // take derivative at f16 of x[i]: + ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]); + float usedx = GGML_FP16_TO_FP32(fp16); + dx[i] = ggml_silu_backward_f32(usedx, dy[i]); + } +} +#else +inline static void ggml_vec_silu_backward_f32(const int n, float * dx, const float * x, const float * dy) { + for (int i = 0; i < n; ++i) { + dx[i] = ggml_silu_backward_f32(x[i], dy[i]); + } +} +#endif + +inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) { +#ifndef GGML_USE_ACCELERATE + ggml_float sum = 0.0; + for (int i = 0; i < n; ++i) { + sum += (ggml_float)x[i]; + } + *s = sum; +#else + vDSP_sve(x, 1, s, n); +#endif +} + +inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x) { + ggml_float sum = 0.0; + for (int i = 0; i < n; ++i) { + sum += (ggml_float)x[i]; + } + *s = sum; +} + +inline static void ggml_vec_max_f32(const int n, float * s, const float * x) { +#ifndef GGML_USE_ACCELERATE + float max = -INFINITY; + for (int i = 0; i < n; ++i) { + max = MAX(max, x[i]); + } + *s = max; +#else + vDSP_maxv(x, 1, s, n); +#endif +} + +inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x) { + ggml_vec_norm_f32(n, s, x); + *s = 1.f/(*s); +} + +// +// logging +// + +#if (GGML_DEBUG >= 1) +#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG(...) +#endif + +#if (GGML_DEBUG >= 5) +#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_5(...) +#endif + +#if (GGML_DEBUG >= 10) +#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_10(...) +#endif + +#define GGML_PRINT(...) printf(__VA_ARGS__) + +// +// data types +// + +static const int GGML_BLCK_SIZE[GGML_TYPE_COUNT] = { + [GGML_TYPE_F32] = 1, + [GGML_TYPE_F16] = 1, + [GGML_TYPE_Q4_0] = QK4_0, + [GGML_TYPE_Q4_1] = QK4_1, + [GGML_TYPE_Q5_0] = QK5_0, + [GGML_TYPE_Q5_1] = QK5_1, + [GGML_TYPE_Q8_0] = QK8_0, + [GGML_TYPE_Q8_1] = QK8_1, +#ifdef GGML_USE_K_QUANTS + [GGML_TYPE_Q2_K] = QK_K, + [GGML_TYPE_Q3_K] = QK_K, + [GGML_TYPE_Q4_K] = QK_K, + [GGML_TYPE_Q5_K] = QK_K, + [GGML_TYPE_Q6_K] = QK_K, + [GGML_TYPE_Q8_K] = QK_K, +#endif + [GGML_TYPE_I8] = 1, + [GGML_TYPE_I16] = 1, + [GGML_TYPE_I32] = 1, +}; +static_assert(GGML_TYPE_COUNT == 19, "GGML_BLCK_SIZE is outdated"); + +static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = { + [GGML_TYPE_F32] = sizeof(float), + [GGML_TYPE_F16] = sizeof(ggml_fp16_t), + [GGML_TYPE_Q4_0] = sizeof(block_q4_0), + [GGML_TYPE_Q4_1] = sizeof(block_q4_1), + [GGML_TYPE_Q5_0] = sizeof(block_q5_0), + [GGML_TYPE_Q5_1] = sizeof(block_q5_1), + [GGML_TYPE_Q8_0] = sizeof(block_q8_0), + [GGML_TYPE_Q8_1] = sizeof(block_q8_1), +#ifdef GGML_USE_K_QUANTS + [GGML_TYPE_Q2_K] = sizeof(block_q2_K), + [GGML_TYPE_Q3_K] = sizeof(block_q3_K), + [GGML_TYPE_Q4_K] = sizeof(block_q4_K), + [GGML_TYPE_Q5_K] = sizeof(block_q5_K), + [GGML_TYPE_Q6_K] = sizeof(block_q6_K), + [GGML_TYPE_Q8_K] = sizeof(block_q8_K), +#endif + [GGML_TYPE_I8] = sizeof(int8_t), + [GGML_TYPE_I16] = sizeof(int16_t), + [GGML_TYPE_I32] = sizeof(int32_t), +}; +static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_SIZE is outdated"); + + +static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = { + [GGML_TYPE_F32] = "f32", + [GGML_TYPE_F16] = "f16", + [GGML_TYPE_Q4_0] = "q4_0", + [GGML_TYPE_Q4_1] = "q4_1", + [GGML_TYPE_Q5_0] = "q5_0", + [GGML_TYPE_Q5_1] = "q5_1", + [GGML_TYPE_Q8_0] = "q8_0", + [GGML_TYPE_Q8_1] = "q8_1", + [GGML_TYPE_Q2_K] = "q2_K", + [GGML_TYPE_Q3_K] = "q3_K", + [GGML_TYPE_Q4_K] = "q4_K", + [GGML_TYPE_Q5_K] = "q5_K", + [GGML_TYPE_Q6_K] = "q6_K", + [GGML_TYPE_Q8_K] = "q8_K", + [GGML_TYPE_I8] = "i8", + [GGML_TYPE_I16] = "i16", + [GGML_TYPE_I32] = "i32", +}; +static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_NAME is outdated"); + +static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = { + [GGML_TYPE_F32] = false, + [GGML_TYPE_F16] = false, + [GGML_TYPE_Q4_0] = true, + [GGML_TYPE_Q4_1] = true, + [GGML_TYPE_Q5_0] = true, + [GGML_TYPE_Q5_1] = true, + [GGML_TYPE_Q8_0] = true, + [GGML_TYPE_Q8_1] = true, + [GGML_TYPE_Q2_K] = true, + [GGML_TYPE_Q3_K] = true, + [GGML_TYPE_Q4_K] = true, + [GGML_TYPE_Q5_K] = true, + [GGML_TYPE_Q6_K] = true, + [GGML_TYPE_Q8_K] = true, + [GGML_TYPE_I8] = false, + [GGML_TYPE_I16] = false, + [GGML_TYPE_I32] = false, +}; +static_assert(GGML_TYPE_COUNT == 19, "GGML_IS_QUANTIZED is outdated"); + +static const char * GGML_OP_NAME[GGML_OP_COUNT] = { + "NONE", + + "DUP", + "ADD", + "ADD1", + "ACC", + "SUB", + "MUL", + "DIV", + "SQR", + "SQRT", + "LOG", + "SUM", + "SUM_ROWS", + "MEAN", + "REPEAT", + "REPEAT_BACK", + "REPEAT2", + "ABS", + "SGN", + "NEG", + "STEP", + "RELU", + "GELU", + "SILU", + "SILU_BACK", + "NORM", + "RMS_NORM", + "RMS_NORM_BACK", + + "MUL_MAT", + "OUT_PROD", + + "SCALE", + "SET", + "CPY", + "CONT", + "RESHAPE", + "VIEW", + "PERMUTE", + "TRANSPOSE", + "GET_ROWS", + "GET_ROWS_BACK", + "DIAG", + "DIAG_MASK_INF", + "DIAG_MASK_ZERO", + "SOFT_MAX", + "SOFT_MAX_BACK", + "ROPE", + "ROPE_BACK", + "ALIBI", + "CLAMP", + "CONV_1D_1S", + "CONV_1D_2S", + + "FLASH_ATTN", + "FLASH_FF", + "FLASH_ATTN_BACK", + + "MAP_UNARY", + "MAP_BINARY", + + "CROSS_ENTROPY_LOSS", + "CROSS_ENTROPY_LOSS_BACK", +}; + +static_assert(GGML_OP_COUNT == 58, "GGML_OP_COUNT != 58"); + +static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { + "none", + + "x", + "x+y", + "x+y", + "view(x,nb,offset)+=y->x", + "x-y", + "x*y", + "x/y", + "x^2", + "√x", + "log(x)", + "Σx", + "Σx_k", + "Σx/n", + "repeat(x)", + "repeat_back(x)", + "repeat2(x)", + "abs(x)", + "sgn(x)", + "-x", + "step(x)", + "relu(x)", + "gelu(x)", + "silu(x)", + "silu_back(x)", + "norm(x)", + "rms_norm(x)", + "rms_norm_back(x)", + + "X*Y", + "X*Y", + + "x*v", + "y-\\>view(x)", + "x-\\>y", + "cont(x)", + "reshape(x)", + "view(x)", + "permute(x)", + "transpose(x)", + "get_rows(x)", + "get_rows_back(x)", + "diag(x)", + "diag_mask_inf(x)", + "diag_mask_zero(x)", + "soft_max(x)", + "soft_max_back(x)", + "rope(x)", + "rope_back(x)", + "alibi(x)", + "clamp(x)", + "conv_1d_1s(x)", + "conv_1d_2s(x)", + + "flash_attn(x)", + "flash_ff(x)", + "flash_attn_back(x)", + + "f(x)", + "f(x,y)", + + "cross_entropy_loss(x,y)", + "cross_entropy_loss_back(x,y)", +}; + +static_assert(GGML_OP_COUNT == 58, "GGML_OP_COUNT != 58"); + +static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN"); +static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN"); + +// +// ggml context +// + +struct ggml_context { + size_t mem_size; + void * mem_buffer; + bool mem_buffer_owned; + bool no_alloc; + bool no_alloc_save; // this is used to save the no_alloc state when using scratch buffers + + int n_objects; + + struct ggml_object * objects_begin; + struct ggml_object * objects_end; + + struct ggml_scratch scratch; + struct ggml_scratch scratch_save; +}; + +struct ggml_context_container { + bool used; + + struct ggml_context context; +}; + +// +// ggml state +// + +struct ggml_state { + struct ggml_context_container contexts[GGML_MAX_CONTEXTS]; +}; + +// global state +static struct ggml_state g_state; +static atomic_int g_state_barrier = 0; + +// barrier via spin lock +inline static void ggml_critical_section_start(void) { + int processing = atomic_fetch_add(&g_state_barrier, 1); + + while (processing > 0) { + // wait for other threads to finish + atomic_fetch_sub(&g_state_barrier, 1); + sched_yield(); // TODO: reconsider this + processing = atomic_fetch_add(&g_state_barrier, 1); + } +} + +// TODO: make this somehow automatically executed +// some sort of "sentry" mechanism +inline static void ggml_critical_section_end(void) { + atomic_fetch_sub(&g_state_barrier, 1); +} + +//////////////////////////////////////////////////////////////////////////////// + +void ggml_print_object(const struct ggml_object * obj) { + GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n", + obj->offs, obj->size, (const void *) obj->next); +} + +void ggml_print_objects(const struct ggml_context * ctx) { + struct ggml_object * obj = ctx->objects_begin; + + GGML_PRINT("%s: objects in context %p:\n", __func__, (const void *) ctx); + + while (obj != NULL) { + ggml_print_object(obj); + obj = obj->next; + } + + GGML_PRINT("%s: --- end ---\n", __func__); +} + +int64_t ggml_nelements(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3]; +} + +int64_t ggml_nrows(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return tensor->ne[1]*tensor->ne[2]*tensor->ne[3]; +} + +size_t ggml_nbytes(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + // this should handle cases where the tensor is not contiguous in memory + // probaby just: + // + // return tensor->ne[3]*tensor->nb[3] + // + // is enough, but just in case, adding the second part + + return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]); +} + +size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return (nrows_split*tensor->ne[0]*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]; +} + +int ggml_blck_size(enum ggml_type type) { + return GGML_BLCK_SIZE[type]; +} + +size_t ggml_type_size(enum ggml_type type) { + return GGML_TYPE_SIZE[type]; +} + +float ggml_type_sizef(enum ggml_type type) { + return ((float)(GGML_TYPE_SIZE[type]))/GGML_BLCK_SIZE[type]; +} + +const char * ggml_type_name(enum ggml_type type) { + return GGML_TYPE_NAME[type]; +} + +const char * ggml_op_name(enum ggml_op op) { + return GGML_OP_NAME[op]; +} + +size_t ggml_element_size(const struct ggml_tensor * tensor) { + return GGML_TYPE_SIZE[tensor->type]; +} + +static inline bool ggml_is_scalar(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; +} + +static inline bool ggml_is_vector(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; +} + +static inline bool ggml_is_matrix(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return tensor->ne[2] == 1 && tensor->ne[3] == 1; +} + +static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return + (t0->ne[0] == t1->ne[0]) && + (t0->ne[2] == t1->ne[2]) && + (t0->ne[3] == t1->ne[3]); +} + +static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return + (t0->ne[1] == t1->ne[1]) && + (t0->ne[2] == t1->ne[2]) && + (t0->ne[3] == t1->ne[3]); +} + +bool ggml_is_quantized(enum ggml_type type) { + return GGML_IS_QUANTIZED[type]; +} + +enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) { + enum ggml_type wtype = GGML_TYPE_COUNT; + + switch (ftype) { + case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break; + case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break; + case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break; + case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break; + case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break; + case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break; + case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break; + case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break; + case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break; + case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break; + case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break; + case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break; + case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break; + case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break; + } + + GGML_ASSERT(wtype != GGML_TYPE_COUNT); + + return wtype; +} + +size_t ggml_tensor_overhead(void) { + return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16; +} + +bool ggml_is_transposed(const struct ggml_tensor * tensor) { + return tensor->nb[0] > tensor->nb[1]; +} + +bool ggml_is_contiguous(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return + tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] && + tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/GGML_BLCK_SIZE[tensor->type] && + tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && + tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; +} + +bool ggml_is_permuted(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3]; +} + +static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return + tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] && + tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && + tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; +} + +static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return + (t0->ne[0] == t1->ne[0] ) && + (t0->ne[1] == t1->ne[1] ) && + (t0->ne[2] == t1->ne[2] ) && + (t0->ne[3] == t1->ne[3] ); +} + +// check if t1 can be represented as a repeatition of t0 +static inline bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return + (t1->ne[0]%t0->ne[0] == 0) && + (t1->ne[1]%t0->ne[1] == 0) && + (t1->ne[2]%t0->ne[2] == 0) && + (t1->ne[3]%t0->ne[3] == 0); +} + +static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1); +} + +static inline int ggml_up32(int n) { + return (n + 31) & ~31; +} + +//static inline int ggml_up64(int n) { +// return (n + 63) & ~63; +//} + +static inline int ggml_up(int n, int m) { + // assert m is a power of 2 + GGML_ASSERT((m & (m - 1)) == 0); + return (n + m - 1) & ~(m - 1); +} + +// assert that pointer is aligned to GGML_MEM_ALIGN +#define ggml_assert_aligned(ptr) \ + GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0) + +//////////////////////////////////////////////////////////////////////////////// + +struct ggml_context * ggml_init(struct ggml_init_params params) { + // make this function thread safe + ggml_critical_section_start(); + + static bool is_first_call = true; + + if (is_first_call) { + // initialize time system (required on Windows) + ggml_time_init(); + + // initialize GELU, SILU and EXP F32 tables + { + const uint64_t t_start = ggml_time_us(); UNUSED(t_start); + + ggml_fp16_t ii; + for (int i = 0; i < (1 << 16); ++i) { + uint16_t ui = i; + memcpy(&ii, &ui, sizeof(ii)); + const float f = table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii); + table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f)); + table_silu_f16[i] = GGML_FP32_TO_FP16(ggml_silu_f32(f)); + table_exp_f16[i] = GGML_FP32_TO_FP16(expf(f)); + } + + const uint64_t t_end = ggml_time_us(); UNUSED(t_end); + + GGML_PRINT_DEBUG("%s: GELU, SILU and EXP tables initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f); + } + + // initialize g_state + { + const uint64_t t_start = ggml_time_us(); UNUSED(t_start); + + g_state = (struct ggml_state) { + /*.contexts =*/ { { 0 } }, + }; + + for (int i = 0; i < GGML_MAX_CONTEXTS; ++i) { + g_state.contexts[i].used = false; + } + + const uint64_t t_end = ggml_time_us(); UNUSED(t_end); + + GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f); + } + +#if defined(GGML_USE_CUBLAS) + ggml_init_cublas(); +#elif defined(GGML_USE_CLBLAST) + ggml_cl_init(); +#endif + + is_first_call = false; + } + + // find non-used context in g_state + struct ggml_context * ctx = NULL; + + for (int i = 0; i < GGML_MAX_CONTEXTS; i++) { + if (!g_state.contexts[i].used) { + g_state.contexts[i].used = true; + ctx = &g_state.contexts[i].context; + + GGML_PRINT_DEBUG("%s: found unused context %d\n", __func__, i); + break; + } + } + + if (ctx == NULL) { + GGML_PRINT_DEBUG("%s: no unused context found\n", __func__); + + ggml_critical_section_end(); + + return NULL; + } + + const size_t mem_size = (params.mem_size + GGML_MEM_ALIGN - 1) & ~(GGML_MEM_ALIGN - 1); + + *ctx = (struct ggml_context) { + /*.mem_size =*/ mem_size, + /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size), + /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, + /*.no_alloc =*/ params.no_alloc, + /*.no_alloc_save =*/ params.no_alloc, + /*.n_objects =*/ 0, + /*.objects_begin =*/ NULL, + /*.objects_end =*/ NULL, + /*.scratch =*/ { 0, 0, NULL, }, + /*.scratch_save =*/ { 0, 0, NULL, }, + }; + + GGML_ASSERT(ctx->mem_buffer != NULL); + + ggml_assert_aligned(ctx->mem_buffer); + + GGML_PRINT_DEBUG("%s: context initialized\n", __func__); + + ggml_critical_section_end(); + + return ctx; +} + +void ggml_free(struct ggml_context * ctx) { + // make this function thread safe + ggml_critical_section_start(); + + bool found = false; + + for (int i = 0; i < GGML_MAX_CONTEXTS; i++) { + if (&g_state.contexts[i].context == ctx) { + g_state.contexts[i].used = false; + + GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n", + __func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size); + + if (ctx->mem_buffer_owned) { + GGML_ALIGNED_FREE(ctx->mem_buffer); + } + + found = true; + break; + } + } + + if (!found) { + GGML_PRINT_DEBUG("%s: context not found\n", __func__); + } + + ggml_critical_section_end(); +} + +size_t ggml_used_mem(const struct ggml_context * ctx) { + return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size; +} + +size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) { + const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0; + + ctx->scratch = scratch; + + return result; +} + +void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) { + ctx->no_alloc = no_alloc; +} + +void * ggml_get_mem_buffer(const struct ggml_context * ctx) { + return ctx->mem_buffer; +} + +size_t ggml_get_mem_size(const struct ggml_context * ctx) { + return ctx->mem_size; +} + +size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { + size_t max_size = 0; + + struct ggml_object * obj = ctx->objects_begin; + + while (obj != NULL) { + struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs); + + const size_t size = ggml_nbytes(tensor); + + if (max_size < size) { + max_size = size; + } + + obj = obj->next; + } + + return max_size; +} + +// IMPORTANT: +// when creating "opt" tensors, always save and load the scratch buffer +// this is an error prone process, but it is necessary to support inplace +// operators when using scratch buffers +// TODO: implement a better way +void ggml_scratch_save(struct ggml_context * ctx) { + // this is needed to allow opt tensors to store their data + // TODO: again, need to find a better way + ctx->no_alloc_save = ctx->no_alloc; + ctx->no_alloc = false; + + ctx->scratch_save = ctx->scratch; + ctx->scratch.data = NULL; +} + +void ggml_scratch_load(struct ggml_context * ctx) { + ctx->no_alloc = ctx->no_alloc_save; + + ctx->scratch = ctx->scratch_save; +} + +//////////////////////////////////////////////////////////////////////////////// + +struct ggml_tensor * ggml_new_tensor_impl( + struct ggml_context * ctx, + enum ggml_type type, + int n_dims, + const int64_t* ne, + void* data) { + // always insert objects at the end of the context's memory pool + struct ggml_object * obj_cur = ctx->objects_end; + + const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs; + const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size; + const size_t cur_end = cur_offs + cur_size; + + size_t size_needed = 0; + + if (data == NULL && !ctx->no_alloc) { + size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]); + for (int i = 1; i < n_dims; i++) { + size_needed *= ne[i]; + } + // align to GGML_MEM_ALIGN + size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN; + } + + char * const mem_buffer = ctx->mem_buffer; + struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end); + + if (ctx->scratch.data == NULL || data != NULL) { + size_needed += GGML_TENSOR_SIZE; + + if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) { + GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n", + __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size); + assert(false); + return NULL; + } + + *obj_new = (struct ggml_object) { + .offs = cur_end + GGML_OBJECT_SIZE, + .size = size_needed, + .next = NULL, + }; + } else { + if (ctx->scratch.offs + size_needed > ctx->scratch.size) { + GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n", + __func__, ctx->scratch.offs + size_needed, ctx->scratch.size); + assert(false); + return NULL; + } + + if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) { + GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n", + __func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size); + assert(false); + return NULL; + } + + data = (char * const) ctx->scratch.data + ctx->scratch.offs; + + *obj_new = (struct ggml_object) { + .offs = cur_end + GGML_OBJECT_SIZE, + .size = GGML_TENSOR_SIZE, + .next = NULL, + }; + + //printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed); + + ctx->scratch.offs += size_needed; + } + + if (obj_cur != NULL) { + obj_cur->next = obj_new; + } else { + // this is the first object in this context + ctx->objects_begin = obj_new; + } + + ctx->objects_end = obj_new; + + //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size); + + struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offs); + + ggml_assert_aligned(result); + + *result = (struct ggml_tensor) { + /*.type =*/ type, + /*.backend =*/ GGML_BACKEND_CPU, + /*.n_dims =*/ n_dims, + /*.ne =*/ { 1, 1, 1, 1 }, + /*.nb =*/ { 0, 0, 0, 0 }, + /*.op =*/ GGML_OP_NONE, + /*.is_param =*/ false, + /*.grad =*/ NULL, + /*.src0 =*/ NULL, + /*.src1 =*/ NULL, + /*.opt =*/ { NULL }, + /*.n_tasks =*/ 0, + /*.perf_runs =*/ 0, + /*.perf_cycles =*/ 0, + /*.perf_time_us =*/ 0, + /*.data =*/ (data == NULL && !ctx->no_alloc) ? (void *)(result + 1) : data, + /*.name =*/ { 0 }, + /*.extra =*/ NULL, + /*.pad =*/ { 0 }, + }; + + // TODO: this should not be needed as long as we don't rely on aligned SIMD loads + //ggml_assert_aligned(result->data); + + for (int i = 0; i < n_dims; i++) { + result->ne[i] = ne[i]; + } + + result->nb[0] = GGML_TYPE_SIZE[type]; + result->nb[1] = result->nb[0]*(result->ne[0]/GGML_BLCK_SIZE[type]); + for (int i = 2; i < GGML_MAX_DIMS; i++) { + result->nb[i] = result->nb[i - 1]*result->ne[i - 1]; + } + + ctx->n_objects++; + + return result; +} + +struct ggml_tensor * ggml_new_tensor( + struct ggml_context * ctx, + enum ggml_type type, + int n_dims, + const int64_t * ne) { + return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL); +} + +struct ggml_tensor * ggml_new_tensor_1d( + struct ggml_context * ctx, + enum ggml_type type, + int64_t ne0) { + return ggml_new_tensor(ctx, type, 1, &ne0); +} + +struct ggml_tensor * ggml_new_tensor_2d( + struct ggml_context * ctx, + enum ggml_type type, + int64_t ne0, + int64_t ne1) { + const int64_t ne[2] = { ne0, ne1 }; + return ggml_new_tensor(ctx, type, 2, ne); +} + +struct ggml_tensor * ggml_new_tensor_3d( + struct ggml_context * ctx, + enum ggml_type type, + int64_t ne0, + int64_t ne1, + int64_t ne2) { + const int64_t ne[3] = { ne0, ne1, ne2 }; + return ggml_new_tensor(ctx, type, 3, ne); +} + +struct ggml_tensor * ggml_new_tensor_4d( + struct ggml_context * ctx, + enum ggml_type type, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3) { + const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; + return ggml_new_tensor(ctx, type, 4, ne); +} + +struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) { + ggml_scratch_save(ctx); + + struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1); + + ggml_scratch_load(ctx); + + ggml_set_i32(result, value); + + return result; +} + +struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) { + ggml_scratch_save(ctx); + + struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1); + + ggml_scratch_load(ctx); + + ggml_set_f32(result, value); + + return result; +} + +struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) { + return ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, NULL); +} + +struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) { + memset(tensor->data, 0, ggml_nbytes(tensor)); + return tensor; +} + +struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) { + const int n = ggml_nrows(tensor); + const int nc = tensor->ne[0]; + const size_t n1 = tensor->nb[1]; + + char * const data = tensor->data; + + switch (tensor->type) { + case GGML_TYPE_I8: + { + assert(tensor->nb[0] == sizeof(int8_t)); + for (int i = 0; i < n; i++) { + ggml_vec_set_i8(nc, (int8_t *)(data + i*n1), value); + } + } break; + case GGML_TYPE_I16: + { + assert(tensor->nb[0] == sizeof(int16_t)); + for (int i = 0; i < n; i++) { + ggml_vec_set_i16(nc, (int16_t *)(data + i*n1), value); + } + } break; + case GGML_TYPE_I32: + { + assert(tensor->nb[0] == sizeof(int32_t)); + for (int i = 0; i < n; i++) { + ggml_vec_set_i32(nc, (int32_t *)(data + i*n1), value); + } + } break; + case GGML_TYPE_F16: + { + assert(tensor->nb[0] == sizeof(ggml_fp16_t)); + for (int i = 0; i < n; i++) { + ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), value); + } + } break; + case GGML_TYPE_F32: + { + assert(tensor->nb[0] == sizeof(float)); + for (int i = 0; i < n; i++) { + ggml_vec_set_f32(nc, (float *)(data + i*n1), value); + } + } break; + default: + { + GGML_ASSERT(false); + } break; + } + + return tensor; +} + +struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) { + const int n = ggml_nrows(tensor); + const int nc = tensor->ne[0]; + const size_t n1 = tensor->nb[1]; + + char * const data = tensor->data; + + switch (tensor->type) { + case GGML_TYPE_I8: + { + assert(tensor->nb[0] == sizeof(int8_t)); + for (int i = 0; i < n; i++) { + ggml_vec_set_i8(nc, (int8_t *)(data + i*n1), value); + } + } break; + case GGML_TYPE_I16: + { + assert(tensor->nb[0] == sizeof(int16_t)); + for (int i = 0; i < n; i++) { + ggml_vec_set_i16(nc, (int16_t *)(data + i*n1), value); + } + } break; + case GGML_TYPE_I32: + { + assert(tensor->nb[0] == sizeof(int32_t)); + for (int i = 0; i < n; i++) { + ggml_vec_set_i32(nc, (int32_t *)(data + i*n1), value); + } + } break; + case GGML_TYPE_F16: + { + assert(tensor->nb[0] == sizeof(ggml_fp16_t)); + for (int i = 0; i < n; i++) { + ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), value); + } + } break; + case GGML_TYPE_F32: + { + assert(tensor->nb[0] == sizeof(float)); + for (int i = 0; i < n; i++) { + ggml_vec_set_f32(nc, (float *)(data + i*n1), value); + } + } break; + default: + { + GGML_ASSERT(false); + } break; + } + + return tensor; +} + +int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) { + switch (tensor->type) { + case GGML_TYPE_I8: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int8_t)); + return ((int8_t *)(tensor->data))[i]; + } break; + case GGML_TYPE_I16: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int16_t)); + return ((int16_t *)(tensor->data))[i]; + } break; + case GGML_TYPE_I32: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int32_t)); + return ((int32_t *)(tensor->data))[i]; + } break; + case GGML_TYPE_F16: + { + GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); + return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]); + } break; + case GGML_TYPE_F32: + { + GGML_ASSERT(tensor->nb[0] == sizeof(float)); + return ((float *)(tensor->data))[i]; + } break; + default: + { + GGML_ASSERT(false); + } break; + } + + return 0.0f; +} + +void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) { + switch (tensor->type) { + case GGML_TYPE_I8: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int8_t)); + ((int8_t *)(tensor->data))[i] = value; + } break; + case GGML_TYPE_I16: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int16_t)); + ((int16_t *)(tensor->data))[i] = value; + } break; + case GGML_TYPE_I32: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int32_t)); + ((int32_t *)(tensor->data))[i] = value; + } break; + case GGML_TYPE_F16: + { + GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); + ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value); + } break; + case GGML_TYPE_F32: + { + GGML_ASSERT(tensor->nb[0] == sizeof(float)); + ((float *)(tensor->data))[i] = value; + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) { + switch (tensor->type) { + case GGML_TYPE_I8: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int8_t)); + return ((int8_t *)(tensor->data))[i]; + } break; + case GGML_TYPE_I16: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int16_t)); + return ((int16_t *)(tensor->data))[i]; + } break; + case GGML_TYPE_I32: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int32_t)); + return ((int32_t *)(tensor->data))[i]; + } break; + case GGML_TYPE_F16: + { + GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); + return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]); + } break; + case GGML_TYPE_F32: + { + GGML_ASSERT(tensor->nb[0] == sizeof(float)); + return ((float *)(tensor->data))[i]; + } break; + default: + { + GGML_ASSERT(false); + } break; + } + + return 0.0f; +} + +void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) { + switch (tensor->type) { + case GGML_TYPE_I8: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int8_t)); + ((int8_t *)(tensor->data))[i] = value; + } break; + case GGML_TYPE_I16: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int16_t)); + ((int16_t *)(tensor->data))[i] = value; + } break; + case GGML_TYPE_I32: + { + GGML_ASSERT(tensor->nb[0] == sizeof(int32_t)); + ((int32_t *)(tensor->data))[i] = value; + } break; + case GGML_TYPE_F16: + { + GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); + ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value); + } break; + case GGML_TYPE_F32: + { + GGML_ASSERT(tensor->nb[0] == sizeof(float)); + ((float *)(tensor->data))[i] = value; + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +void * ggml_get_data(const struct ggml_tensor * tensor) { + return tensor->data; +} + +float * ggml_get_data_f32(const struct ggml_tensor * tensor) { + assert(tensor->type == GGML_TYPE_F32); + return (float *)(tensor->data); +} + +const char * ggml_get_name(const struct ggml_tensor * tensor) { + return tensor->name; +} + +void ggml_set_name(struct ggml_tensor * tensor, const char * name) { + strncpy(tensor->name, name, sizeof(tensor->name)); + tensor->name[sizeof(tensor->name) - 1] = '\0'; +} + +struct ggml_tensor * ggml_view_tensor( + struct ggml_context * ctx, + const struct ggml_tensor * src) { + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src->data); + + result->nb[0] = src->nb[0]; + result->nb[1] = src->nb[1]; + result->nb[2] = src->nb[2]; + result->nb[3] = src->nb[3]; + + return result; +} + +struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) { + struct ggml_object * obj = ctx->objects_begin; + + char * const mem_buffer = ctx->mem_buffer; + + while (obj != NULL) { + struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs); + if (strcmp(cur->name, name) == 0) { + return cur; + } + + obj = obj->next; + } + + return NULL; +} + +//////////////////////////////////////////////////////////////////////////////// + +// ggml_dup + +struct ggml_tensor * ggml_dup_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_DUP; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_dup( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_dup_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_dup_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_dup_impl(ctx, a, true); +} + +// ggml_add + +struct ggml_tensor * ggml_add_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + bool inplace) { + GGML_ASSERT(ggml_are_same_shape(a, b)); + + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_ADD; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_add( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_add_impl(ctx, a, b, false); +} + +struct ggml_tensor * ggml_add_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_add_impl(ctx, a, b, true); +} + +// ggml_add1 + +struct ggml_tensor * ggml_add1_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + bool inplace) { + GGML_ASSERT(ggml_is_scalar(b)); + GGML_ASSERT(ggml_is_padded_1d(a)); + + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_ADD1; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_add1( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_add1_impl(ctx, a, b, false); +} + +struct ggml_tensor * ggml_add1_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_add1_impl(ctx, a, b, true); +} + +// ggml_acc + +struct ggml_tensor * ggml_acc_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset, + bool inplace) { + GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a)); + GGML_ASSERT(ggml_is_contiguous(a)); + GGML_ASSERT(a->type == GGML_TYPE_F32); + GGML_ASSERT(b->type == GGML_TYPE_F32); + + bool is_node = false; + + if (!inplace && (a->grad || b->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + ggml_scratch_save(ctx); + + struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5); + + ((int32_t *) c->data)[0] = nb1; + ((int32_t *) c->data)[1] = nb2; + ((int32_t *) c->data)[2] = nb3; + ((int32_t *) c->data)[3] = offset; + ((int32_t *) c->data)[4] = inplace ? 1 : 0; + + ggml_scratch_load(ctx); + + result->op = GGML_OP_ACC; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + result->opt[0] = c; + + return result; +} + +struct ggml_tensor * ggml_acc( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset) { + return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false); +} + +struct ggml_tensor * ggml_acc_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset) { + return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true); +} + +// ggml_sub + +struct ggml_tensor * ggml_sub_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + bool inplace) { + GGML_ASSERT(ggml_are_same_shape(a, b)); + + bool is_node = false; + + if (!inplace && (a->grad || b->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_SUB; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_sub( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_sub_impl(ctx, a, b, false); +} + +struct ggml_tensor * ggml_sub_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_sub_impl(ctx, a, b, true); +} + +// ggml_mul + +struct ggml_tensor * ggml_mul_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + bool inplace) { + // TODO: support less-strict constraint + // GGML_ASSERT(ggml_can_repeat(b, a)); + GGML_ASSERT(ggml_can_repeat_rows(b, a)); + + bool is_node = false; + + if (!inplace && (a->grad || b->grad)) { + // TODO: support backward pass for broadcasting + GGML_ASSERT(ggml_are_same_shape(a, b)); + is_node = true; + } + + if (inplace) { + GGML_ASSERT(is_node == false); + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_MUL; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_mul( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_mul_impl(ctx, a, b, false); +} + +struct ggml_tensor * ggml_mul_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_mul_impl(ctx, a, b, true); +} + +// ggml_div + +struct ggml_tensor * ggml_div_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + bool inplace) { + GGML_ASSERT(ggml_are_same_shape(a, b)); + + bool is_node = false; + + if (!inplace && (a->grad || b->grad)) { + is_node = true; + } + + if (inplace) { + GGML_ASSERT(is_node == false); + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_DIV; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_div( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_div_impl(ctx, a, b, false); +} + +struct ggml_tensor * ggml_div_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_div_impl(ctx, a, b, true); +} + +// ggml_sqr + +struct ggml_tensor * ggml_sqr_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_SQR; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_sqr( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_sqr_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_sqr_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_sqr_impl(ctx, a, true); +} + +// ggml_sqrt + +struct ggml_tensor * ggml_sqrt_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_SQRT; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_sqrt( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_sqrt_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_sqrt_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_sqrt_impl(ctx, a, true); +} + + +// ggml_log + +struct ggml_tensor * ggml_log_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_LOG; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_log( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_log_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_log_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_log_impl(ctx, a, true); +} + +// ggml_sum + +struct ggml_tensor * ggml_sum( + struct ggml_context * ctx, + struct ggml_tensor * a) { + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1); + + result->op = GGML_OP_SUM; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + + +// ggml_sum_rows + +struct ggml_tensor * ggml_sum_rows( + struct ggml_context * ctx, + struct ggml_tensor * a) { + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + int64_t ne[4] = {1,1,1,1}; + for (int i=1; in_dims; ++i) { + ne[i] = a->ne[i]; + } + + struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, a->n_dims, ne); + + result->op = GGML_OP_SUM_ROWS; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +// ggml_mean + +struct ggml_tensor * ggml_mean( + struct ggml_context * ctx, + struct ggml_tensor * a) { + bool is_node = false; + + if (a->grad) { + GGML_ASSERT(false); // TODO: implement + is_node = true; + } + + int64_t ne[GGML_MAX_DIMS] = { 1, a->ne[1], a->ne[2], a->ne[3] }; + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, ne); + + result->op = GGML_OP_MEAN; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +// ggml_repeat + +struct ggml_tensor * ggml_repeat( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + GGML_ASSERT(ggml_can_repeat(a, b)); + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + if (ggml_are_same_shape(a, b) && !is_node) { + return a; + } + + struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne); + + result->op = GGML_OP_REPEAT; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_repeat_back + +struct ggml_tensor * ggml_repeat_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + GGML_ASSERT(ggml_can_repeat(b, a)); + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + if (ggml_are_same_shape(a, b) && !is_node) { + return a; + } + + struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne); + + result->op = GGML_OP_REPEAT_BACK; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +#include "ggml/ggml-ggllm.c" + +// ggml_abs + +struct ggml_tensor * ggml_abs_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_ABS; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_abs( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_abs_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_abs_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_abs_impl(ctx, a, true); +} + + +// ggml_sgn + +struct ggml_tensor * ggml_sgn_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_SGN; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_sgn( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_sgn_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_sgn_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_sgn_impl(ctx, a, true); +} + +// ggml_neg + +struct ggml_tensor * ggml_neg_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_NEG; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_neg( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_neg_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_neg_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_neg_impl(ctx, a, true); +} + +// ggml_step + +struct ggml_tensor * ggml_step_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_STEP; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_step( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_step_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_step_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_step_impl(ctx, a, true); +} + +// ggml_relu + +struct ggml_tensor * ggml_relu_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_RELU; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_relu( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_relu_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_relu_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_relu_impl(ctx, a, true); +} + +// ggml_gelu + +struct ggml_tensor * ggml_gelu_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_GELU; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_gelu( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_gelu_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_gelu_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_gelu_impl(ctx, a, true); +} + +// ggml_silu + +struct ggml_tensor * ggml_silu_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_SILU; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_silu( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_silu_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_silu_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_silu_impl(ctx, a, true); +} + +// ggml_silu_back + +struct ggml_tensor * ggml_silu_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + bool is_node = false; + + if (a->grad || b->grad) { + // TODO: implement backward + is_node = true; + } + + struct ggml_tensor * result = ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_SILU_BACK; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_norm + +struct ggml_tensor * ggml_norm_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + GGML_ASSERT(false); // TODO: implement backward + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_NORM; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; // TODO: maybe store epsilon here? + + return result; +} + +struct ggml_tensor * ggml_norm( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_norm_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_norm_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_norm_impl(ctx, a, true); +} + +struct ggml_tensor * ggml_rms_norm_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_RMS_NORM; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; // TODO: maybe store epsilon here? + + return result; +} + +struct ggml_tensor * ggml_rms_norm( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_rms_norm_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_rms_norm_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_rms_norm_impl(ctx, a, true); +} + +struct ggml_tensor * ggml_rms_norm_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + bool is_node = false; + + if (a->grad) { + // TODO: implement backward + is_node = true; + } + + struct ggml_tensor * result = ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_RMS_NORM_BACK; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + + +// ggml_mul_mat + +struct ggml_tensor * ggml_mul_mat( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + GGML_ASSERT(ggml_can_mul_mat(a, b)); + GGML_ASSERT(!ggml_is_transposed(a)); + + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; + } + + const int64_t ne[4] = { a->ne[1], b->ne[1], a->ne[2], b->ne[3] }; + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MIN(a->n_dims, b->n_dims), ne); + + result->op = GGML_OP_MUL_MAT; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_out_prod + +struct ggml_tensor * ggml_out_prod( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + GGML_ASSERT(ggml_can_out_prod(a, b)); + GGML_ASSERT(!ggml_is_transposed(a)); + + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; + } + + const int64_t ne[4] = { a->ne[0], b->ne[0], a->ne[2], b->ne[3] }; + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MIN(a->n_dims, b->n_dims), ne); + + result->op = GGML_OP_OUT_PROD; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_scale + +struct ggml_tensor * ggml_scale_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + bool inplace) { + GGML_ASSERT(ggml_is_scalar(b)); + GGML_ASSERT(ggml_is_padded_1d(a)); + + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_SCALE; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_scale( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_scale_impl(ctx, a, b, false); +} + +struct ggml_tensor * ggml_scale_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_scale_impl(ctx, a, b, true); +} + +// ggml_set + +struct ggml_tensor * ggml_set_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset, + bool inplace) { + GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b)); + + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; + } + + // make a view of the destination + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + ggml_scratch_save(ctx); + + struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5); + + (( int32_t * ) c->data)[0] = nb1; + (( int32_t * ) c->data)[1] = nb2; + (( int32_t * ) c->data)[2] = nb3; + (( int32_t * ) c->data)[3] = offset; + (( int32_t * ) c->data)[4] = inplace ? 1 : 0; + + ggml_scratch_load(ctx); + + result->op = GGML_OP_SET; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + result->opt[0] = c; + + return result; +} + +struct ggml_tensor * ggml_set( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset) { + return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false); +} + +struct ggml_tensor * ggml_set_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset) { + return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true); +} + +struct ggml_tensor * ggml_set_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t offset) { + return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false); +} + +struct ggml_tensor * ggml_set_1d_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t offset) { + return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true); +} + +struct ggml_tensor * ggml_set_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t offset) { + return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false); +} + +struct ggml_tensor * ggml_set_2d_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t offset) { + return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false); +} + + +// ggml_cpy + +struct ggml_tensor * ggml_cpy_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + bool inplace) { + GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b)); + + bool is_node = false; + + if (!inplace && (a->grad || b->grad)) { + is_node = true; + } + + // make a view of the destination + struct ggml_tensor * result = ggml_view_tensor(ctx, b); + + result->op = GGML_OP_CPY; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_cpy( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_cpy_impl(ctx, a, b, false); +} + +struct ggml_tensor * ggml_cpy_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_cpy_impl(ctx, a, b, true); +} + +// ggml_cont + +struct ggml_tensor * ggml_cont_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && a->grad) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_CONT; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_cont( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_cont_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_cont_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_cont_impl(ctx, a, true); +} + +// ggml_reshape + +struct ggml_tensor * ggml_reshape( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + GGML_ASSERT(ggml_is_contiguous(a)); + GGML_ASSERT(ggml_is_contiguous(b)); + GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b)); + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + if (b->grad) { + // gradient propagation is not supported + //GGML_ASSERT(false); + } + + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a->data); + + result->op = GGML_OP_RESHAPE; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_reshape_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0) { + GGML_ASSERT(ggml_is_contiguous(a)); + GGML_ASSERT(ggml_nelements(a) == ne0); + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + const int64_t ne[1] = { ne0 }; + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a->data); + + result->op = GGML_OP_RESHAPE; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_reshape_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1) { + GGML_ASSERT(ggml_is_contiguous(a)); + GGML_ASSERT(ggml_nelements(a) == ne0*ne1); + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + const int64_t ne[2] = { ne0, ne1 }; + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a->data); + + result->op = GGML_OP_RESHAPE; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_reshape_3d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2) { + GGML_ASSERT(ggml_is_contiguous(a)); + GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2); + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + const int64_t ne[3] = { ne0, ne1, ne2 }; + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a->data); + + result->op = GGML_OP_RESHAPE; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + + +struct ggml_tensor * ggml_reshape_4d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3) { + GGML_ASSERT(ggml_is_contiguous(a)); + GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3); + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a->data); + + result->op = GGML_OP_RESHAPE; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +// ggml_view_1d + +struct ggml_tensor * ggml_view_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + size_t offset) { + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset); + + ggml_scratch_save(ctx); + + struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2); + memcpy(offs->data, &offset, 2*sizeof(int32_t)); + + ggml_scratch_load(ctx); + + result->op = GGML_OP_VIEW; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + result->opt[0] = offs; + + return result; +} + +// ggml_view_2d + +struct ggml_tensor * ggml_view_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + size_t nb1, + size_t offset) { + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 }; + + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset); + + ggml_scratch_save(ctx); + + struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2); + memcpy(offs->data, &offset, 2*sizeof(int32_t)); + + ggml_scratch_load(ctx); + + result->nb[1] = nb1; + result->nb[2] = result->nb[1]*ne1; + result->nb[3] = result->nb[2]; + + result->op = GGML_OP_VIEW; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + result->opt[0] = offs; + + return result; +} + +// ggml_view_3d + +struct ggml_tensor * ggml_view_3d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + size_t nb1, + size_t nb2, + size_t offset) { + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 }; + + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset); + + ggml_scratch_save(ctx); + + struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2); + memcpy(offs->data, &offset, 2*sizeof(int32_t)); + + ggml_scratch_load(ctx); + + result->nb[1] = nb1; + result->nb[2] = nb2; + result->nb[3] = result->nb[2]*ne2; + + result->op = GGML_OP_VIEW; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + result->opt[0] = offs; + + return result; +} + +// ggml_view_4d + +struct ggml_tensor * ggml_view_4d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset) { + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 }; + + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset); + + ggml_scratch_save(ctx); + + struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2); + memcpy(offs->data, &offset, 2*sizeof(int32_t)); + + ggml_scratch_load(ctx); + + result->nb[1] = nb1; + result->nb[2] = nb2; + result->nb[3] = nb3; + + result->op = GGML_OP_VIEW; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + result->opt[0] = offs; + + return result; +} + +// ggml_permute + +struct ggml_tensor * ggml_permute( + struct ggml_context * ctx, + struct ggml_tensor * a, + int axis0, + int axis1, + int axis2, + int axis3) { + GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS); + GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS); + GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS); + GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS); + + GGML_ASSERT(axis0 != axis1); + GGML_ASSERT(axis0 != axis2); + GGML_ASSERT(axis0 != axis3); + GGML_ASSERT(axis1 != axis2); + GGML_ASSERT(axis1 != axis3); + GGML_ASSERT(axis2 != axis3); + + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + struct ggml_tensor * result = ggml_view_tensor(ctx, a); + + int ne[GGML_MAX_DIMS]; + int nb[GGML_MAX_DIMS]; + + ne[axis0] = a->ne[0]; + ne[axis1] = a->ne[1]; + ne[axis2] = a->ne[2]; + ne[axis3] = a->ne[3]; + + nb[axis0] = a->nb[0]; + nb[axis1] = a->nb[1]; + nb[axis2] = a->nb[2]; + nb[axis3] = a->nb[3]; + + result->ne[0] = ne[0]; + result->ne[1] = ne[1]; + result->ne[2] = ne[2]; + result->ne[3] = ne[3]; + + result->nb[0] = nb[0]; + result->nb[1] = nb[1]; + result->nb[2] = nb[2]; + result->nb[3] = nb[3]; + + result->op = GGML_OP_PERMUTE; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + if (is_node) { + ggml_scratch_save(ctx); + + struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4); + + ((int32_t *) b->data)[0] = axis0; + ((int32_t *) b->data)[1] = axis1; + ((int32_t *) b->data)[2] = axis2; + ((int32_t *) b->data)[3] = axis3; + + ggml_scratch_load(ctx); + + result->opt[0] = b; + } + + return result; +} + +// ggml_transpose + +struct ggml_tensor * ggml_transpose( + struct ggml_context * ctx, + struct ggml_tensor * a) { + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + struct ggml_tensor * result = ggml_view_tensor(ctx, a); + + result->ne[0] = a->ne[1]; + result->ne[1] = a->ne[0]; + + result->nb[0] = a->nb[1]; + result->nb[1] = a->nb[0]; + + result->op = GGML_OP_TRANSPOSE; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +// ggml_get_rows + +struct ggml_tensor * ggml_get_rows( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32); + + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; + } + + // TODO: implement non F32 return + //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]); + struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, a->ne[0], b->ne[0]); + + result->op = GGML_OP_GET_ROWS; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_get_rows_back + +struct ggml_tensor * ggml_get_rows_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c) { + GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32); + GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0])); + + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; + } + + // TODO: implement non F32 return + //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]); + struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]); + + result->op = GGML_OP_GET_ROWS_BACK; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + result->opt[0] = c; + + return result; +} + +// ggml_diag + +struct ggml_tensor * ggml_diag( + struct ggml_context * ctx, + struct ggml_tensor * a) { + GGML_ASSERT(a->ne[1] == 1); + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] }; + struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, MAX(a->n_dims, 2), ne); + + result->op = GGML_OP_DIAG; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + + +// ggml_diag_mask_inf + +struct ggml_tensor * ggml_diag_mask_inf_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + bool inplace) { + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + ggml_scratch_save(ctx); + + struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2); + + ((int32_t *) b->data)[0] = n_past; + ((int32_t *) b->data)[1] = inplace ? 1 : 0; + + ggml_scratch_load(ctx); + + result->op = GGML_OP_DIAG_MASK_INF; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_diag_mask_inf( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past) { + return ggml_diag_mask_inf_impl(ctx, a, n_past, false); +} + + +struct ggml_tensor * ggml_diag_mask_inf_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past) { + return ggml_diag_mask_inf_impl(ctx, a, n_past, true); +} + +// ggml_diag_mask_zero + +struct ggml_tensor * ggml_diag_mask_zero_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + bool inplace) { + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + ggml_scratch_save(ctx); + + struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2); + ggml_set_name(b, "n_past, inplace"); + + ((int32_t *) b->data)[0] = n_past; + ((int32_t *) b->data)[1] = inplace ? 1 : 0; + + ggml_scratch_load(ctx); + + result->op = GGML_OP_DIAG_MASK_ZERO; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_diag_mask_zero( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past) { + return ggml_diag_mask_zero_impl(ctx, a, n_past, false); +} + +struct ggml_tensor * ggml_diag_mask_zero_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past) { + return ggml_diag_mask_zero_impl(ctx, a, n_past, true); +} + +// ggml_soft_max + +struct ggml_tensor * ggml_soft_max_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_SOFT_MAX; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; + + return result; +} + +struct ggml_tensor * ggml_soft_max( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_soft_max_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_soft_max_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_soft_max_impl(ctx, a, true); +} + + +// ggml_soft_max_back + +struct ggml_tensor * ggml_soft_max_back_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + bool inplace) { + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; // TODO : implement backward pass + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_SOFT_MAX_BACK; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_soft_max_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_soft_max_back_impl(ctx, a, b, false); +} + +struct ggml_tensor * ggml_soft_max_back_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_soft_max_back_impl(ctx, a, b, true); +} + +// ggml_rope + +struct ggml_tensor * ggml_rope_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_dims, + int mode, + bool inplace) { + GGML_ASSERT(n_past >= 0); + bool is_node = false; + + if (a->grad) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + ggml_scratch_save(ctx); + + struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3); + + ((int32_t *) b->data)[0] = n_past; + ((int32_t *) b->data)[1] = n_dims; + ((int32_t *) b->data)[2] = mode; + + ggml_scratch_load(ctx); + + result->op = GGML_OP_ROPE; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +struct ggml_tensor * ggml_rope( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_dims, + int mode) { + return ggml_rope_impl(ctx, a, n_past, n_dims, mode, false); +} + +struct ggml_tensor * ggml_rope_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_dims, + int mode) { + return ggml_rope_impl(ctx, a, n_past, n_dims, mode, true); +} + +// ggml_rope_back + +struct ggml_tensor * ggml_rope_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_dims, + int mode) { + GGML_ASSERT(n_past >= 0); + bool is_node = false; + + if (a->grad) { + is_node = false; // TODO: implement backward + } + + struct ggml_tensor * result = ggml_dup_tensor(ctx, a); + + ggml_scratch_save(ctx); + + struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3); + ggml_set_name(b, "n_past, n_dims, mode"); + + ((int32_t *) b->data)[0] = n_past; + ((int32_t *) b->data)[1] = n_dims; + ((int32_t *) b->data)[2] = mode; + + ggml_scratch_load(ctx); + + result->op = GGML_OP_ROPE_BACK; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_alibi + +struct ggml_tensor * ggml_alibi( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_head, + float bias_max) { + GGML_ASSERT(n_past >= 0); + bool is_node = false; + + if (a->grad) { + GGML_ASSERT(false); // TODO: implement backward + is_node = true; + } + + // TODO: when implement backward, fix this: + //struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + struct ggml_tensor * result = ggml_view_tensor(ctx, a); + + ggml_scratch_save(ctx); + + struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3); + + ((int32_t *) b->data)[0] = n_past; + ((int32_t *) b->data)[1] = n_head; + GGML_ASSERT(sizeof(float) == sizeof(int32_t)); + (((float *) b->data)[2]) = bias_max; + + ggml_scratch_load(ctx); + + result->op = GGML_OP_ALIBI; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_clamp + +struct ggml_tensor * ggml_clamp( + struct ggml_context * ctx, + struct ggml_tensor * a, + float min, + float max) { + bool is_node = false; + + if (a->grad) { + GGML_ASSERT(false); // TODO: implement backward + is_node = true; + } + + // TODO: when implement backward, fix this: + struct ggml_tensor * result = ggml_view_tensor(ctx, a); + + ggml_scratch_save(ctx); + + struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3); + + ((float *) b->data)[0] = min; + ((float *) b->data)[1] = max; + + ggml_scratch_load(ctx); + + result->op = GGML_OP_CLAMP; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_conv_1d_1s + +struct ggml_tensor * ggml_conv_1d_1s( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + GGML_ASSERT(ggml_is_matrix(b)); + GGML_ASSERT(a->ne[1] == b->ne[1]); + GGML_ASSERT(a->ne[3] == 1); + bool is_node = false; + + if (a->grad || b->grad) { + GGML_ASSERT(false); // TODO: implement backward + is_node = true; + } + + const int64_t ne[4] = { b->ne[0], a->ne[2], 1, 1, }; + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); + + result->op = GGML_OP_CONV_1D_1S; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_conv_1d_2s + +struct ggml_tensor * ggml_conv_1d_2s( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + GGML_ASSERT(ggml_is_matrix(b)); + GGML_ASSERT(a->ne[1] == b->ne[1]); + GGML_ASSERT(a->ne[3] == 1); + bool is_node = false; + + if (a->grad || b->grad) { + GGML_ASSERT(false); // TODO: implement backward + is_node = true; + } + + const int64_t ne[4] = { b->ne[0]/2, a->ne[2], 1, 1, }; + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); + + result->op = GGML_OP_CONV_1D_2S; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_flash_attn + +struct ggml_tensor * ggml_flash_attn( + struct ggml_context * ctx, + struct ggml_tensor * q, + struct ggml_tensor * k, + struct ggml_tensor * v, + bool masked) { + GGML_ASSERT(ggml_can_mul_mat(k, q)); + // TODO: check if vT can be multiplied by (k*qT) + + bool is_node = false; + + if (q->grad || k->grad || v->grad) { + is_node = true; + } + + //struct ggml_tensor * result = ggml_dup_tensor(ctx, q); + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, q->ne); + + result->op = GGML_OP_FLASH_ATTN; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = q; + result->src1 = k; + result->opt[0] = v; + result->opt[1] = ggml_new_i32(ctx, masked ? 1 : 0); + + return result; +} + +// ggml_flash_ff + +struct ggml_tensor * ggml_flash_ff( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b0, + struct ggml_tensor * b1, + struct ggml_tensor * c0, + struct ggml_tensor * c1) { + GGML_ASSERT(ggml_can_mul_mat(b0, a)); + // TODO: more checks + + bool is_node = false; + + if (a->grad || b0->grad || b1->grad || c0->grad || c1->grad) { + is_node = true; + } + + //struct ggml_tensor * result = ggml_dup_tensor(ctx, a); + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, a->ne); + + result->op = GGML_OP_FLASH_FF; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b0; + result->opt[0] = b1; + result->opt[1] = c0; + result->opt[2] = c1; + + return result; +} + +// ggml_flash_attn_back + +struct ggml_tensor * ggml_flash_attn_back( + struct ggml_context * ctx, + struct ggml_tensor * q, + struct ggml_tensor * k, + struct ggml_tensor * v, + struct ggml_tensor * d, + bool masked) { + GGML_ASSERT(ggml_can_mul_mat(k, q)); + // TODO: check if vT can be multiplied by (k*qT) + + // d shape [D,N,ne2,ne3] + // q shape [D,N,ne2,ne3] + // k shape [D,M,ne2,ne3] + // v shape [M,D,ne2,ne3] + + const int64_t D = q->ne[0]; + const int64_t N = q->ne[1]; + const int64_t M = k->ne[1]; + const int64_t ne2 = q->ne[2]; + const int64_t ne3 = q->ne[3]; + + GGML_ASSERT(k->ne[0] == D); + GGML_ASSERT(v->ne[0] == M); + GGML_ASSERT(v->ne[1] == D); + GGML_ASSERT(d->ne[0] == D); + GGML_ASSERT(d->ne[1] == N); + GGML_ASSERT(k->ne[2] == ne2); + GGML_ASSERT(k->ne[3] == ne3); + GGML_ASSERT(v->ne[2] == ne2); + GGML_ASSERT(v->ne[3] == ne3); + GGML_ASSERT(d->ne[2] == ne2); + GGML_ASSERT(d->ne[3] == ne3); + + bool is_node = false; + + if (q->grad || k->grad || v->grad) { + // when using this operation (in backwards pass) these grads are set. + // we don't want to create (big) grad of our result, so is_node is false. + is_node = false; + } + + // store gradients of q, k and v as continuous tensors concatenated in result. + // q shape[D,N,ne2,ne3] ; k shape [D,M,ne2,ne3] ; v shape [M,D,ne2,ne3] + // gradq->data = result->data + // gradk->data = result->data + nb0*D*N*ne2*ne3 + // gradv->data = result->data + nb0*D*N*ne2*ne3 + nb0*D*M*ne2*ne3 + // note: v and gradv are actually transposed, i.e. v->ne[0] != D. + int64_t ne[4] = {D,M+N+M,ne2,ne3}; + + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); + + result->op = GGML_OP_FLASH_ATTN_BACK; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = q; + result->src1 = k; + result->opt[0] = v; + result->opt[1] = d; + result->opt[2] = ggml_new_i32(ctx, masked ? 1 : 0); + + return result; +} + + +// ggml_map_unary + +struct ggml_tensor * ggml_map_unary_impl_f32( + struct ggml_context * ctx, + struct ggml_tensor * a, + const ggml_unary_op_f32_t fun, + bool inplace) { + bool is_node = false; + + if (!inplace && a->grad) { + is_node = true; + } + + struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t)); + *((void (**)(void))addr_tensor->data) = (void (*)(void))fun; + struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_MAP_UNARY; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->opt[0] = addr_tensor; + + return result; +} + +struct ggml_tensor * ggml_map_unary_f32( + struct ggml_context * ctx, + struct ggml_tensor * a, + const ggml_unary_op_f32_t fun) { + return ggml_map_unary_impl_f32(ctx, a, fun, false); +} + +struct ggml_tensor * ggml_map_unary_inplace_f32( + struct ggml_context * ctx, + struct ggml_tensor * a, + const ggml_unary_op_f32_t fun) { + return ggml_map_unary_impl_f32(ctx, a, fun, true); +} + +// ggml_map_binary + +struct ggml_tensor * ggml_map_binary_impl_f32( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + const ggml_binary_op_f32_t fun, + bool inplace) { + GGML_ASSERT(ggml_are_same_shape(a, b)); + + bool is_node = false; + + if (!inplace && (a->grad || b->grad)) { + is_node = true; + } + + struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t)); + *((void (**)(void))addr_tensor->data) = (void (*)(void))fun; + struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_MAP_BINARY; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + result->opt[0] = addr_tensor; + + return result; +} + +struct ggml_tensor * ggml_map_binary_f32( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + const ggml_binary_op_f32_t fun) { + return ggml_map_binary_impl_f32(ctx, a, b, fun, false); +} + +struct ggml_tensor * ggml_map_binary_inplace_f32( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + const ggml_binary_op_f32_t fun) { + return ggml_map_binary_impl_f32(ctx, a, b, fun, true); +} + +// ggml_cross_entropy_loss + +struct ggml_tensor * ggml_cross_entropy_loss( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + GGML_ASSERT(ggml_are_same_shape(a, b)); + bool is_node = false; + + if (a->grad || b->grad) { + is_node = true; + } + + struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1); + + result->op = GGML_OP_CROSS_ENTROPY_LOSS; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + +// ggml_cross_entropy_loss_back + +struct ggml_tensor * ggml_cross_entropy_loss_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c) { + GGML_ASSERT(ggml_are_same_shape(a, b)); + GGML_ASSERT(ggml_is_scalar(c)); + + struct ggml_tensor * result = ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK; + result->grad = NULL; + result->src0 = a; + result->src1 = b; + result->opt[0] = c; + + return result; +} + +//////////////////////////////////////////////////////////////////////////////// + +void ggml_set_param( + struct ggml_context * ctx, + struct ggml_tensor * tensor) { + tensor->is_param = true; + + GGML_ASSERT(tensor->grad == NULL); + tensor->grad = ggml_dup_tensor(ctx, tensor); +} + +// ggml_compute_forward_dup + +static void ggml_compute_forward_dup_same_cont( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0)); + GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == dst->type); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const size_t nb00 = src0->nb[0]; + const size_t nb0 = dst->nb[0]; + + const int ith = params->ith; // thread index + const int nth = params->nth; // number of threads + + // parallelize by elements + const int ne = ggml_nelements(dst); + const int dr = (ne + nth - 1) / nth; + const int ie0 = dr * ith; + const int ie1 = MIN(ie0 + dr, ne); + + if (ie0 < ie1) { + memcpy( + ((char *) dst->data + ie0*nb0), + ((char *) src0->data + ie0*nb00), + (ie1 - ie0) * GGML_TYPE_SIZE[src0->type]); + } + +} +static void ggml_compute_forward_dup_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const int ith = params->ith; // thread index + const int nth = params->nth; // number of threads + + if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { + ggml_compute_forward_dup_same_cont(params, src0, dst); + return; + } + + // parallelize by rows + const int nr = ne01; + // number of rows per thread + const int dr = (nr + nth - 1) / nth; + // row range for this thread + const int ir0 = dr * ith; + const int ir1 = MIN(ir0 + dr, nr); + + if (src0->type == dst->type && + ne00 == ne0 && + nb00 == GGML_TYPE_SIZE[src0->type] && nb0 == GGML_TYPE_SIZE[dst->type]) { + // copy by rows + const size_t rs = ne00*nb00; + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = ir0; i01 < ir1; i01++) { + memcpy( + ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3), + ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03), + rs); + } + } + } + return; + } + + // TODO: add more special-case implementations for tensor shapes/strides that can benefit from memcpy + + if (ggml_is_contiguous(dst)) { + if (nb00 == sizeof(ggml_fp16_t)) { + if (dst->type == GGML_TYPE_F16) { + size_t id = 0; + const size_t rs = ne00 * nb00; + char * dst_ptr = (char *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += rs * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + const char * src0_ptr = (char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03; + memcpy(dst_ptr + id, src0_ptr, rs); + id += rs; + } + id += rs * (ne01 - ir1); + } + } + } else if (dst->type == GGML_TYPE_F32) { + size_t id = 0; + float * dst_ptr = (float *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += ne00 * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); + for (int i00 = 0; i00 < ne00; i00++) { + dst_ptr[id] = GGML_FP16_TO_FP32(src0_ptr[i00]); + id++; + } + } + id += ne00 * (ne01 - ir1); + } + } + } else if (ggml_is_quantized(dst->type)) { + quantize_row_q_t const quantize_row_q = quantize_fns[dst->type].quantize_row_q; + float * src0_f32 = (float *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith; + + size_t id = 0; + size_t rs = nb0 * (ne00 / GGML_BLCK_SIZE[dst->type]); + char * dst_ptr = (char *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += rs * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); + + for (int i00 = 0; i00 < ne00; i00++) { + src0_f32[i00] = GGML_FP16_TO_FP32(src0_ptr[i00]); + } + + quantize_row_q(src0_f32, dst_ptr + id, ne00); + id += rs; + } + id += rs * (ne01 - ir1); + } + } + } else { + GGML_ASSERT(false); // TODO: implement + } + } else { + //printf("%s: this is not optimal - fix me\n", __func__); + + if (dst->type == GGML_TYPE_F32) { + size_t id = 0; + float * dst_ptr = (float *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += ne00 * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + for (int i00 = 0; i00 < ne00; i00++) { + const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); + + dst_ptr[id] = GGML_FP16_TO_FP32(*src0_ptr); + id++; + } + } + id += ne00 * (ne01 - ir1); + } + } + } else if (dst->type == GGML_TYPE_F16) { + size_t id = 0; + ggml_fp16_t * dst_ptr = (ggml_fp16_t *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += ne00 * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + for (int i00 = 0; i00 < ne00; i00++) { + const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); + + dst_ptr[id] = *src0_ptr; + id++; + } + } + id += ne00 * (ne01 - ir1); + } + } + } else { + GGML_ASSERT(false); // TODO: implement + } + } + return; + } + + // dst counters + int64_t i10 = 0; + int64_t i11 = 0; + int64_t i12 = 0; + int64_t i13 = 0; + + if (dst->type == GGML_TYPE_F16) { + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + i10 += ne00 * ir0; + while (i10 >= ne0) { + i10 -= ne0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + for (int64_t i01 = ir0; i01 < ir1; i01++) { + for (int64_t i00 = 0; i00 < ne00; i00++) { + const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); + char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); + + memcpy(dst_ptr, src0_ptr, sizeof(ggml_fp16_t)); + + if (++i10 == ne00) { + i10 = 0; + if (++i11 == ne01) { + i11 = 0; + if (++i12 == ne02) { + i12 = 0; + if (++i13 == ne03) { + i13 = 0; + } + } + } + } + } + } + i10 += ne00 * (ne01 - ir1); + while (i10 >= ne0) { + i10 -= ne0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + } + } + } else if (dst->type == GGML_TYPE_F32) { + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + i10 += ne00 * ir0; + while (i10 >= ne0) { + i10 -= ne0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + for (int64_t i01 = ir0; i01 < ir1; i01++) { + for (int64_t i00 = 0; i00 < ne00; i00++) { + const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); + char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); + + *(float *) dst_ptr = GGML_FP16_TO_FP32(*(const ggml_fp16_t *) src0_ptr); + + if (++i10 == ne0) { + i10 = 0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + } + } + i10 += ne00 * (ne01 - ir1); + while (i10 >= ne0) { + i10 -= ne0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + } + } + } else { + GGML_ASSERT(false); // TODO: implement + } +} + +static void ggml_compute_forward_dup_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const int ith = params->ith; // thread index + const int nth = params->nth; // number of threads + + if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { + ggml_compute_forward_dup_same_cont(params, src0, dst); + return; + } + + // parallelize by rows + const int nr = ne01; + // number of rows per thread + const int dr = (nr + nth - 1) / nth; + // row range for this thread + const int ir0 = dr * ith; + const int ir1 = MIN(ir0 + dr, nr); + + if (src0->type == dst->type && + ne00 == ne0 && + nb00 == GGML_TYPE_SIZE[src0->type] && nb0 == GGML_TYPE_SIZE[dst->type]) { + // copy by rows + const size_t rs = ne00*nb00; + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = ir0; i01 < ir1; i01++) { + memcpy( + ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3), + ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03), + rs); + } + } + } + return; + } + + if (ggml_is_contiguous(dst)) { + // TODO: simplify + if (nb00 == sizeof(float)) { + if (dst->type == GGML_TYPE_F32) { + size_t id = 0; + const size_t rs = ne00 * nb00; + char * dst_ptr = (char *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += rs * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + const char * src0_ptr = (char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03; + memcpy(dst_ptr + id, src0_ptr, rs); + id += rs; + } + id += rs * (ne01 - ir1); + } + } + } else if (dst->type == GGML_TYPE_F16) { + size_t id = 0; + ggml_fp16_t * dst_ptr = (ggml_fp16_t *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += ne00 * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + for (int i00 = 0; i00 < ne00; i00++) { + const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); + + dst_ptr[id] = GGML_FP32_TO_FP16(*src0_ptr); + id++; + } + } + id += ne00 * (ne01 - ir1); + } + } + } else if (ggml_is_quantized(dst->type)) { + quantize_row_q_t const quantize_row_q = quantize_fns[dst->type].quantize_row_q; + + size_t id = 0; + size_t rs = nb0 * (ne00 / GGML_BLCK_SIZE[dst->type]); + char * dst_ptr = (char *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += rs * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + const float * src0_ptr = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); + quantize_row_q(src0_ptr, dst_ptr + id, ne00); + id += rs; + } + id += rs * (ne01 - ir1); + } + } + } else { + GGML_ASSERT(false); // TODO: implement + } + } else { + //printf("%s: this is not optimal - fix me\n", __func__); + + if (dst->type == GGML_TYPE_F32) { + size_t id = 0; + float * dst_ptr = (float *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += ne00 * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + for (int i00 = 0; i00 < ne00; i00++) { + const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); + + dst_ptr[id] = *src0_ptr; + id++; + } + } + id += ne00 * (ne01 - ir1); + } + } + } else if (dst->type == GGML_TYPE_F16) { + size_t id = 0; + ggml_fp16_t * dst_ptr = (ggml_fp16_t *) dst->data; + + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + id += ne00 * ir0; + for (int i01 = ir0; i01 < ir1; i01++) { + for (int i00 = 0; i00 < ne00; i00++) { + const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); + + dst_ptr[id] = GGML_FP32_TO_FP16(*src0_ptr); + id++; + } + } + id += ne00 * (ne01 - ir1); + } + } + } else { + GGML_ASSERT(false); // TODO: implement + } + } + + return; + } + + // dst counters + + int64_t i10 = 0; + int64_t i11 = 0; + int64_t i12 = 0; + int64_t i13 = 0; + + if (dst->type == GGML_TYPE_F32) { + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + i10 += ne00 * ir0; + while (i10 >= ne0) { + i10 -= ne0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + for (int64_t i01 = ir0; i01 < ir1; i01++) { + for (int64_t i00 = 0; i00 < ne00; i00++) { + const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); + char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); + + memcpy(dst_ptr, src0_ptr, sizeof(float)); + + if (++i10 == ne0) { + i10 = 0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + } + } + i10 += ne00 * (ne01 - ir1); + while (i10 >= ne0) { + i10 -= ne0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + } + } + } else if (dst->type == GGML_TYPE_F16) { + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + i10 += ne00 * ir0; + while (i10 >= ne0) { + i10 -= ne0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + for (int64_t i01 = ir0; i01 < ir1; i01++) { + for (int64_t i00 = 0; i00 < ne00; i00++) { + const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); + char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); + + *(ggml_fp16_t *) dst_ptr = GGML_FP32_TO_FP16(*(const float *) src0_ptr); + + if (++i10 == ne0) { + i10 = 0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + } + } + i10 += ne00 * (ne01 - ir1); + while (i10 >= ne0) { + i10 -= ne0; + if (++i11 == ne1) { + i11 = 0; + if (++i12 == ne2) { + i12 = 0; + if (++i13 == ne3) { + i13 = 0; + } + } + } + } + } + } + } else { + GGML_ASSERT(false); // TODO: implement + } +} + +static void ggml_compute_forward_dup( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { + ggml_compute_forward_dup_same_cont(params, src0, dst); + return; + } + switch (src0->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_dup_f16(params, src0, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_dup_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_add + +static void ggml_compute_forward_add_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(src0); + const int64_t ne0 = src0->ne[0]; + const int64_t ne1 = src0->ne[1]; + const int64_t ne2 = src0->ne[2]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb10 = src1->nb[0]; + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + GGML_ASSERT( nb0 == sizeof(float)); + GGML_ASSERT(nb00 == sizeof(float)); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + if (nb10 == sizeof(float)) { + for (int ir = ir0; ir < ir1; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + +#ifdef GGML_USE_ACCELERATE + vDSP_vadd( + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, + (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, + ne0); +#else + ggml_vec_add_f32(ne0, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), + (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11)); +#endif + // } + // } + } + } else { + // src1 is not contiguous + for (int ir = ir0; ir < ir1; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); + float * src0_ptr = (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); + for (int i0 = 0; i0 < ne0; i0++) { + float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11 + i0*nb10); + + dst_ptr[i0] = src0_ptr[i0] + *src1_ptr; + } + } + } +} + +static void ggml_compute_forward_add_f16_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(src0); + const int64_t ne0 = src0->ne[0]; + const int64_t ne1 = src0->ne[1]; + const int64_t ne2 = src0->ne[2]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb10 = src1->nb[0]; + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT(dst->type == GGML_TYPE_F16); + + GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + if (nb10 == sizeof(float)) { + for (int ir = ir0; ir < ir1; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); + ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); + float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); + + for (int i = 0; i < ne0; i++) { + dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i]); + } + } + } + else { + // src1 is not contiguous + GGML_ASSERT(false); + } +} + +static void ggml_compute_forward_add_f16_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(src0); + const int64_t ne0 = src0->ne[0]; + const int64_t ne1 = src0->ne[1]; + const int64_t ne2 = src0->ne[2]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb10 = src1->nb[0]; + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F16); + GGML_ASSERT(dst->type == GGML_TYPE_F16); + + GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + if (nb10 == sizeof(ggml_fp16_t)) { + for (int ir = ir0; ir < ir1; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); + ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); + ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); + + for (int i = 0; i < ne0; i++) { + dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + GGML_FP16_TO_FP32(src1_ptr[i])); + } + } + } + else { + // src1 is not contiguous + GGML_ASSERT(false); + } +} + +static void ggml_compute_forward_add_q_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int nr = ggml_nrows(src0); + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + //const int64_t ne03 = src0->ne[3]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb10 = src1->nb[0]; + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + const enum ggml_type type = src0->type; + dequantize_row_q_t const dequantize_row_q = quantize_fns[type].dequantize_row_q; + quantize_row_q_t const quantize_row_q = quantize_fns[type].quantize_row_q; + + // we don't support permuted src0 or src1 + GGML_ASSERT(nb00 == GGML_TYPE_SIZE[type]); + GGML_ASSERT(nb10 == sizeof(float)); + + // dst cannot be transposed or permuted + GGML_ASSERT(nb0 <= nb1); + GGML_ASSERT(nb1 <= nb2); + GGML_ASSERT(nb2 <= nb3); + + GGML_ASSERT(ggml_is_quantized(src0->type)); + GGML_ASSERT(dst->type == src0->type); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + float * wdata = (float *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith; + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 indices + const int i03 = ir/(ne02*ne01); + const int i02 = (ir - i03*ne02*ne01)/ne01; + const int i01 = (ir - i03*ne02*ne01 - i02*ne01); + + // src1 and dst are same shape as src0 => same indices + const int i13 = i03; + const int i12 = i02; + const int i11 = i01; + + const int i3 = i03; + const int i2 = i02; + const int i1 = i01; + + void * src0_row = (void *) ((char *) src0->data + (i01*nb01 + i02*nb02 + i03*nb03)); + float * src1_row = (float *)((char *) src1->data + (i11*nb11 + i12*nb12 + i13*nb13)); + void * dst_row = (void *) ((char *) dst->data + ( i1*nb1 + i2*nb2 + i3*nb3)); + + assert(ne00 % 32 == 0); + + // unquantize row from src0 to temp buffer + dequantize_row_q(src0_row, wdata, ne00); + // add src1 + ggml_vec_acc_f32(ne00, wdata, src1_row); + // quantize row to dst + quantize_row_q(wdata, dst_row, ne00); + } +} + +static void ggml_compute_forward_add( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_add_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_F16: + { + if (src1->type == GGML_TYPE_F16) { + ggml_compute_forward_add_f16_f16(params, src0, src1, dst); + } + else if (src1->type == GGML_TYPE_F32) { + ggml_compute_forward_add_f16_f32(params, src0, src1, dst); + } + else { + GGML_ASSERT(false); + } + } break; + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + { + ggml_compute_forward_add_q_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_add1 + +static void ggml_compute_forward_add1_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + GGML_ASSERT(ggml_is_scalar(src1)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(src0); + const int64_t ne0 = src0->ne[0]; + const int64_t ne1 = src0->ne[1]; + const int64_t ne2 = src0->ne[2]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + GGML_ASSERT( nb0 == sizeof(float)); + GGML_ASSERT(nb00 == sizeof(float)); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + +#ifdef GGML_USE_ACCELERATE + UNUSED(ggml_vec_add1_f32); + + vDSP_vadd( + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, + (float *) ((char *) src1->data), 0, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, + ne0); +#else + ggml_vec_add1_f32(ne0, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), + *(float *) src1->data); +#endif + } +} + +static void ggml_compute_forward_add1_f16_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + GGML_ASSERT(ggml_is_scalar(src1)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // scalar to add + const float v = *(float *) src1->data; + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(src0); + const int64_t ne0 = src0->ne[0]; + const int64_t ne1 = src0->ne[1]; + const int64_t ne2 = src0->ne[2]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT(dst->type == GGML_TYPE_F16); + + GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); + ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); + for (int i = 0; i < ne0; i++) { + dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + v); + } + } +} + +static void ggml_compute_forward_add1_f16_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + GGML_ASSERT(ggml_is_scalar(src1)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // scalar to add + const float v = GGML_FP16_TO_FP32(*(ggml_fp16_t *) src1->data); + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(src0); + const int64_t ne0 = src0->ne[0]; + const int64_t ne1 = src0->ne[1]; + const int64_t ne2 = src0->ne[2]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F16); + GGML_ASSERT(dst->type == GGML_TYPE_F16); + + GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); + ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); + for (int i = 0; i < ne0; i++) { + dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + v); + } + } +} + +static void ggml_compute_forward_add1_q_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + GGML_ASSERT(ggml_is_scalar(src1)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // scalar to add + const float v = *(float *) src1->data; + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(src0); + const int64_t ne0 = src0->ne[0]; + const int64_t ne1 = src0->ne[1]; + const int64_t ne2 = src0->ne[2]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const enum ggml_type type = src0->type; + dequantize_row_q_t const dequantize_row_q = quantize_fns[type].dequantize_row_q; + quantize_row_q_t const quantize_row_q = quantize_fns[type].quantize_row_q; + + // we don't support permuted src0 + GGML_ASSERT(nb00 == GGML_TYPE_SIZE[type]); + + // dst cannot be transposed or permuted + GGML_ASSERT(nb0 <= nb1); + GGML_ASSERT(nb1 <= nb2); + GGML_ASSERT(nb2 <= nb3); + + GGML_ASSERT(ggml_is_quantized(src0->type)); + GGML_ASSERT(dst->type == src0->type); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + float * wdata = (float *) params->wdata + (ne0 + CACHE_LINE_SIZE_F32) * ith; + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + void * src0_row = (void *) ((char *) src0->data + (i1*nb01 + i2*nb02 + i3*nb03)); + void * dst_row = (void *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb0 )); + + assert(ne0 % 32 == 0); + + // unquantize row from src0 to temp buffer + dequantize_row_q(src0_row, wdata, ne0); + // add src1 + ggml_vec_acc1_f32(ne0, wdata, v); + // quantize row to dst + quantize_row_q(wdata, dst_row, ne0); + } +} + +static void ggml_compute_forward_add1( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_add1_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_F16: + { + if (src1->type == GGML_TYPE_F16) { + ggml_compute_forward_add1_f16_f16(params, src0, src1, dst); + } + else if (src1->type == GGML_TYPE_F32) { + ggml_compute_forward_add1_f16_f32(params, src0, src1, dst); + } + else { + GGML_ASSERT(false); + } + } break; + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + { + ggml_compute_forward_add1_q_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + + +// ggml_compute_forward_acc + +static void ggml_compute_forward_acc_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); + + GGML_ASSERT(opt0->type == GGML_TYPE_I32); + GGML_ASSERT(ggml_nelements(opt0) == 5); + + // view src0 and dst with these strides and data offset inbytes during acc + // nb0 is implicitely element_size because src0 and dst are contiguous + size_t nb1 = ((int32_t *) opt0->data)[0]; + size_t nb2 = ((int32_t *) opt0->data)[1]; + size_t nb3 = ((int32_t *) opt0->data)[2]; + size_t offset = ((int32_t *) opt0->data)[3]; + bool inplace = (bool) ((int32_t *) opt0->data)[4]; + + if (!inplace && (params->type == GGML_TASK_INIT)) { + // memcpy needs to be synchronized across threads to avoid race conditions. + // => do it in INIT phase + memcpy( + ((char *) dst->data), + ((char *) src0->data), + ggml_nbytes(dst)); + } + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(src1); + const int nc = src1->ne[0]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + const int64_t ne12 = src1->ne[2]; + const int64_t ne13 = src1->ne[3]; + + const size_t nb10 = src1->nb[0]; + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + // src0 and dst as viewed during acc + const size_t nb0 = ggml_element_size(src0); + + const size_t nb00 = nb0; + const size_t nb01 = nb1; + const size_t nb02 = nb2; + const size_t nb03 = nb3; + + GGML_ASSERT(offset + (ne10 == 0 ? 0 : ne10-1)*nb0 + (ne11 == 0 ? 0 : ne11-1)*nb1 + (ne12 == 0 ? 0 : ne12-1)*nb2 + (ne13 == 0 ? 0 : ne13-1)*nb3 < ggml_nbytes(dst)); + GGML_ASSERT(offset + (ne10 == 0 ? 0 : ne10-1)*nb00 + (ne11 == 0 ? 0 : ne11-1)*nb01 + (ne12 == 0 ? 0 : ne12-1)*nb02 + (ne13 == 0 ? 0 : ne13-1)*nb03 < ggml_nbytes(src0)); + + GGML_ASSERT(nb10 == sizeof(float)); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 and dst are viewed with shape of src1 and offset + // => same indices + const int i3 = ir/(ne12*ne11); + const int i2 = (ir - i3*ne12*ne11)/ne11; + const int i1 = (ir - i3*ne12*ne11 - i2*ne11); + +#ifdef GGML_USE_ACCELERATE + vDSP_vadd( + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + offset), 1, + (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + offset), 1, nc); +#else + ggml_vec_add_f32(nc, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + offset), + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + offset), + (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11)); +#endif + } +} + +static void ggml_compute_forward_acc( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_acc_f32(params, src0, src1, opt0, dst); + } break; + case GGML_TYPE_F16: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_sub + +static void ggml_compute_forward_sub_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int nr = ggml_nrows(src0); + const int64_t ne0 = src0->ne[0]; + const int64_t ne1 = src0->ne[1]; + const int64_t ne2 = src0->ne[2]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb10 = src1->nb[0]; + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + GGML_ASSERT( nb0 == sizeof(float)); + GGML_ASSERT(nb00 == sizeof(float)); + + if (nb10 == sizeof(float)) { + for (int ir = 0; ir < nr; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + +#ifdef GGML_USE_ACCELERATE + vDSP_vsub( + (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, + ne0); +#else + ggml_vec_sub_f32(ne0, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), + (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11)); +#endif + // } + // } + } + } else { + // src1 is not contiguous + for (int ir = 0; ir < nr; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); + float * src0_ptr = (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); + for (int i0 = 0; i0 < ne0; i0++) { + float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11 + i0*nb10); + + dst_ptr[i0] = src0_ptr[i0] - *src1_ptr; + } + } + } +} + +static void ggml_compute_forward_sub( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_sub_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_mul + +static void ggml_compute_forward_mul_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + const int ith = params->ith; + const int nth = params->nth; + +#ifdef GGML_USE_CLBLAST + if (src1->backend == GGML_BACKEND_GPU) { + if (ith == 0) { + ggml_cl_mul(src0, src1, dst); + } + return; + } +#endif + + const int64_t nr = ggml_nrows(src0); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + const int64_t ne12 = src1->ne[2]; + const int64_t ne13 = src1->ne[3]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb10 = src1->nb[0]; + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + GGML_ASSERT( nb0 == sizeof(float)); + GGML_ASSERT(nb00 == sizeof(float)); + GGML_ASSERT(ne00 == ne10); + + if (nb10 == sizeof(float)) { + for (int64_t ir = ith; ir < nr; ir += nth) { + // src0 and dst are same shape => same indices + const int64_t i03 = ir/(ne02*ne01); + const int64_t i02 = (ir - i03*ne02*ne01)/ne01; + const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); + + const int64_t i13 = i03 % ne13; + const int64_t i12 = i02 % ne12; + const int64_t i11 = i01 % ne11; + + float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); + float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); + float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); + +#ifdef GGML_USE_ACCELERATE + UNUSED(ggml_vec_mul_f32); + + vDSP_vmul( src0_ptr, 1, src1_ptr, 1, dst_ptr, 1, ne00); +#else + ggml_vec_mul_f32(ne00, dst_ptr, src0_ptr, src1_ptr); +#endif + // } + // } + } + } else { + // src1 is not contiguous + for (int64_t ir = ith; ir < nr; ir += nth) { + // src0 and dst are same shape => same indices + // src1 is broadcastable across src0 and dst in i1, i2, i3 + const int64_t i03 = ir/(ne02*ne01); + const int64_t i02 = (ir - i03*ne02*ne01)/ne01; + const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); + + const int64_t i13 = i03 % ne13; + const int64_t i12 = i02 % ne12; + const int64_t i11 = i01 % ne11; + + float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); + float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); + + for (int64_t i0 = 0; i0 < ne00; i0++) { + float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i0*nb10); + + dst_ptr[i0] = src0_ptr[i0] * (*src1_ptr); + } + } + } +} + +static void ggml_compute_forward_mul( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_mul_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_div + +static void ggml_compute_forward_div_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int nr = ggml_nrows(src0); + const int64_t ne0 = src0->ne[0]; + const int64_t ne1 = src0->ne[1]; + const int64_t ne2 = src0->ne[2]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb10 = src1->nb[0]; + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + GGML_ASSERT( nb0 == sizeof(float)); + GGML_ASSERT(nb00 == sizeof(float)); + + if (nb10 == sizeof(float)) { + for (int ir = 0; ir < nr; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + +#ifdef GGML_USE_ACCELERATE + vDSP_vdiv( + (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, + ne0); +#else + ggml_vec_div_f32(ne0, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), + (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), + (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11)); +#endif + // } + // } + } + } else { + // src1 is not contiguous + for (int ir = 0; ir < nr; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + + float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); + float * src0_ptr = (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); + for (int i0 = 0; i0 < ne0; i0++) { + float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11 + i0*nb10); + + dst_ptr[i0] = src0_ptr[i0] / (*src1_ptr); + } + } + } +} + +static void ggml_compute_forward_div( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_div_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_sqr + +static void ggml_compute_forward_sqr_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + assert( dst->nb[0] == sizeof(float)); + assert(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + ggml_vec_sqr_f32(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1]))); + } +} + +static void ggml_compute_forward_sqr( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_sqr_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_sqrt + +static void ggml_compute_forward_sqrt_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + assert( dst->nb[0] == sizeof(float)); + assert(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + ggml_vec_sqrt_f32(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1]))); + } +} + +static void ggml_compute_forward_sqrt( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_sqrt_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + + +// ggml_compute_forward_log + +static void ggml_compute_forward_log_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(params->ith == 0); + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + GGML_ASSERT( dst->nb[0] == sizeof(float)); + GGML_ASSERT(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + ggml_vec_log_f32(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1]))); + } +} + +static void ggml_compute_forward_log( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_log_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_sum + +static void ggml_compute_forward_sum_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_is_scalar(dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + assert(ggml_is_scalar(dst)); + assert(src0->nb[0] == sizeof(float)); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + ggml_float sum = 0; + ggml_float row_sum = 0; + + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = 0; i01 < ne01; i01++) { + ggml_vec_sum_ggf(ne00, + &row_sum, + (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03)); + sum += row_sum; + } + } + } + ((float *) dst->data)[0] = sum; +} + +static void ggml_compute_forward_sum( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_sum_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_sum_rows + +static void ggml_compute_forward_sum_rows_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(params->ith == 0); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + GGML_ASSERT(src0->nb[0] == sizeof(float)); + GGML_ASSERT(dst->nb[0] == sizeof(float)); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + GGML_ASSERT(ne0 == 1); + GGML_ASSERT(ne1 == ne01); + GGML_ASSERT(ne2 == ne02); + GGML_ASSERT(ne3 == ne03); + + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + for (int64_t i3 = 0; i3 < ne03; i3++) { + for (int64_t i2 = 0; i2 < ne02; i2++) { + for (int64_t i1 = 0; i1 < ne01; i1++) { + float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03); + float* dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3); + float row_sum = 0; + ggml_vec_sum_f32(ne00, &row_sum, src_row); + dst_row[0] = row_sum; + } + } + } +} + +static void ggml_compute_forward_sum_rows( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_sum_rows_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_mean + +static void ggml_compute_forward_mean_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + assert(src0->nb[0] == sizeof(float)); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + assert(ne0 == 1); + assert(ne1 == ne01); + assert(ne2 == ne02); + assert(ne3 == ne03); + + UNUSED(ne0); + UNUSED(ne1); + UNUSED(ne2); + UNUSED(ne3); + + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = 0; i01 < ne01; i01++) { + ggml_vec_sum_f32(ne00, + (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3), + (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03)); + + *(float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3) /= (float) ne00; + } + } + } +} + +static void ggml_compute_forward_mean( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_mean_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_repeat + +static void ggml_compute_forward_repeat_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(params->ith == 0); + GGML_ASSERT(ggml_can_repeat(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + // guaranteed to be an integer due to the check in ggml_can_repeat + const int nr0 = (int)(ne0/ne00); + const int nr1 = (int)(ne1/ne01); + const int nr2 = (int)(ne2/ne02); + const int nr3 = (int)(ne3/ne03); + + // TODO: support for transposed / permuted tensors + GGML_ASSERT(nb0 == sizeof(float)); + GGML_ASSERT(nb00 == sizeof(float)); + + // TODO: maybe this is not optimal? + for (int i3 = 0; i3 < nr3; i3++) { + for (int k3 = 0; k3 < ne03; k3++) { + for (int i2 = 0; i2 < nr2; i2++) { + for (int k2 = 0; k2 < ne02; k2++) { + for (int i1 = 0; i1 < nr1; i1++) { + for (int k1 = 0; k1 < ne01; k1++) { + for (int i0 = 0; i0 < nr0; i0++) { + ggml_vec_cpy_f32(ne00, + (float *) ((char *) dst->data + (i3*ne03 + k3)*nb3 + (i2*ne02 + k2)*nb2 + (i1*ne01 + k1)*nb1 + (i0*ne00)*nb0), + (float *) ((char *) src0->data + ( k3)*nb03 + ( k2)*nb02 + ( k1)*nb01)); + } + } + } + } + } + } + } +} + +static void ggml_compute_forward_repeat( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_repeat_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_repeat_back + +static void ggml_compute_forward_repeat_back_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(params->ith == 0); + GGML_ASSERT(ggml_can_repeat(dst, src0)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + // guaranteed to be an integer due to the check in ggml_can_repeat + const int nr0 = (int)(ne00/ne0); + const int nr1 = (int)(ne01/ne1); + const int nr2 = (int)(ne02/ne2); + const int nr3 = (int)(ne03/ne3); + + // TODO: support for transposed / permuted tensors + GGML_ASSERT(nb0 == sizeof(float)); + GGML_ASSERT(nb00 == sizeof(float)); + + if (ggml_is_contiguous(dst)) { + ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0); + } else { + for (int k3 = 0; k3 < ne3; k3++) { + for (int k2 = 0; k2 < ne2; k2++) { + for (int k1 = 0; k1 < ne1; k1++) { + ggml_vec_set_f32(ne0, + (float *) ((char *) dst->data + k1*nb1 + k2*nb2 + k3*nb3), + 0); + } + } + } + } + + // TODO: maybe this is not optimal? + for (int i3 = 0; i3 < nr3; i3++) { + for (int k3 = 0; k3 < ne3; k3++) { + for (int i2 = 0; i2 < nr2; i2++) { + for (int k2 = 0; k2 < ne2; k2++) { + for (int i1 = 0; i1 < nr1; i1++) { + for (int k1 = 0; k1 < ne1; k1++) { + for (int i0 = 0; i0 < nr0; i0++) { + ggml_vec_acc_f32(ne0, + (float *) ((char *) dst->data + ( k3)*nb3 + ( k2)*nb2 + ( k1)*nb1), + (float *) ((char *) src0->data + (i3*ne3 + k3)*nb03 + (i2*ne2 + k2)*nb02 + (i1*ne1 + k1)*nb01 + (i0*ne0)*nb00)); + } + } + } + } + } + } + } +} + +static void ggml_compute_forward_repeat_back( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_repeat_back_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_abs + +static void ggml_compute_forward_abs_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + assert(dst->nb[0] == sizeof(float)); + assert(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + ggml_vec_abs_f32(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1]))); + } +} + +static void ggml_compute_forward_abs( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_abs_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_sgn + +static void ggml_compute_forward_sgn_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + assert(dst->nb[0] == sizeof(float)); + assert(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + ggml_vec_sgn_f32(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1]))); + } +} + +static void ggml_compute_forward_sgn( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_sgn_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_neg + +static void ggml_compute_forward_neg_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + assert(dst->nb[0] == sizeof(float)); + assert(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + ggml_vec_neg_f32(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1]))); + } +} + +static void ggml_compute_forward_neg( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_neg_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_step + +static void ggml_compute_forward_step_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + assert(dst->nb[0] == sizeof(float)); + assert(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + ggml_vec_step_f32(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1]))); + } +} + +static void ggml_compute_forward_step( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_step_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_relu + +static void ggml_compute_forward_relu_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + assert(dst->nb[0] == sizeof(float)); + assert(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + ggml_vec_relu_f32(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1]))); + } +} + +static void ggml_compute_forward_relu( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_relu_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_gelu + +static void ggml_compute_forward_gelu_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(dst)); + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src0->ne[0]; + const int nr = ggml_nrows(src0); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + ggml_vec_gelu_f32(nc, + (float *) ((char *) dst->data + i1*( dst->nb[1])), + (float *) ((char *) src0->data + i1*(src0->nb[1]))); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + UNUSED(x); + assert(!isnan(x)); + assert(!isinf(x)); + } +#endif + } +} + +static void ggml_compute_forward_gelu( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_gelu_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } + + //printf("XXXXXXXX gelu\n"); +} + +// ggml_compute_forward_silu + +static void ggml_compute_forward_silu_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(dst)); + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src0->ne[0]; + const int nr = ggml_nrows(src0); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + ggml_vec_silu_f32(nc, + (float *) ((char *) dst->data + i1*( dst->nb[1])), + (float *) ((char *) src0->data + i1*(src0->nb[1]))); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + UNUSED(x); + assert(!isnan(x)); + assert(!isinf(x)); + } +#endif + } +} + +static void ggml_compute_forward_silu( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_silu_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + + +// ggml_compute_forward_silu_back + +static void ggml_compute_forward_silu_back_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * grad, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(grad)); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(dst)); + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + GGML_ASSERT(ggml_are_same_shape(src0, grad)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src0->ne[0]; + const int nr = ggml_nrows(src0); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + ggml_vec_silu_backward_f32(nc, + (float *) ((char *) dst->data + i1*( dst->nb[1])), + (float *) ((char *) src0->data + i1*(src0->nb[1])), + (float *) ((char *) grad->data + i1*(grad->nb[1]))); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + UNUSED(x); + assert(!isnan(x)); + assert(!isinf(x)); + } +#endif + } +} + +static void ggml_compute_forward_silu_back( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * grad, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_silu_back_f32(params, src0, grad, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_norm + +static void ggml_compute_forward_norm_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + GGML_ASSERT(src0->nb[0] == sizeof(float)); + + const int ith = params->ith; + const int nth = params->nth; + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const float eps = 1e-5f; // TODO: make this a parameter + + // TODO: optimize + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = ith; i01 < ne01; i01 += nth) { + const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); + + ggml_float sum = 0.0; + for (int64_t i00 = 0; i00 < ne00; i00++) { + sum += (ggml_float)x[i00]; + } + + float mean = sum/ne00; + + float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); + + ggml_float sum2 = 0.0; + for (int64_t i00 = 0; i00 < ne00; i00++) { + float v = x[i00] - mean; + y[i00] = v; + sum2 += (ggml_float)(v*v); + } + + float variance = sum2/ne00; + const float scale = 1.0f/sqrtf(variance + eps); + + ggml_vec_scale_f32(ne00, y, scale); + } + } + } +} + +static void ggml_compute_forward_norm( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_norm_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +static void ggml_compute_forward_rms_norm_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + GGML_ASSERT(src0->nb[0] == sizeof(float)); + + const int ith = params->ith; + const int nth = params->nth; + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const float eps = 1e-6f; // TODO: make this a parameter + + // TODO: optimize + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = ith; i01 < ne01; i01 += nth) { + const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); + + ggml_float sum = 0.0; + for (int64_t i00 = 0; i00 < ne00; i00++) { + sum += (ggml_float)(x[i00] * x[i00]); + } + + const float mean = sum/ne00; + + float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); + + memcpy(y, x, ne00 * sizeof(float)); + // for (int i00 = 0; i00 < ne00; i00++) { + // y[i00] = x[i00]; + // } + + const float scale = 1.0f/sqrtf(mean + eps); + + ggml_vec_scale_f32(ne00, y, scale); + } + } + } +} + +static void ggml_compute_forward_rms_norm( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_rms_norm_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + + +static void ggml_compute_forward_rms_norm_back_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + GGML_ASSERT(src0->nb[0] == sizeof(float)); + + const int ith = params->ith; + const int nth = params->nth; + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const float eps = 1e-6f; // TODO: make this a parameter + + // TODO: optimize + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = ith; i01 < ne01; i01 += nth) { + // src1 is same shape as src0 => same indices + const int64_t i11 = i01; + const int64_t i12 = i02; + const int64_t i13 = i03; + + const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); + const float * dz = (float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13); + + ggml_float sum_xx = 0.0; + ggml_float sum_xdz = 0.0; + + for (int64_t i00 = 0; i00 < ne00; i00++) { + sum_xx += (ggml_float)(x[i00] * x[i00]); + sum_xdz += (ggml_float)(x[i00] * dz[i00]); + } + + //const float mean = (float)(sum_xx)/ne00; + const float mean_eps = (float)(sum_xx)/ne00 + eps; + const float sum_eps = (float)(sum_xx) + eps*ne00; + //const float mean_xdz = (float)(sum_xdz)/ne00; + // we could cache rms from forward pass to improve performance. + // to do this implement ggml_rms and compose ggml_rms_norm using ggml_rms. + //const float rms = sqrtf(mean_eps); + const float rrms = 1.0f / sqrtf(mean_eps); + //const float scale = -rrms/(ne00 * mean_eps); // -1/(n*rms**3) + + { + // z = rms_norm(x) + // + // rms_norm(src0) = + // scale( + // src0, + // div( + // 1, + // sqrt( + // add( + // scale( + // sum( + // sqr( + // src0)), + // (1.0/N)), + // eps)))); + + // postorder: + // ## op args grad + // 00 param src0 grad[#00] + // 01 const 1 + // 02 sqr (#00) grad[#02] + // 03 sum (#02) grad[#03] + // 04 const 1/N + // 05 scale (#03, #04) grad[#05] + // 06 const eps + // 07 add (#05, #06) grad[#07] + // 08 sqrt (#07) grad[#08] + // 09 div (#01,#08) grad[#09] + // 10 scale (#00,#09) grad[#10] + // + // backward pass, given grad[#10] + // #10: scale + // grad[#00] += scale(grad[#10],#09) + // grad[#09] += sum(mul(grad[#10],#00)) + // #09: div + // grad[#08] += neg(mul(grad[#09], div(#09,#08))) + // #08: sqrt + // grad[#07] += mul(grad[#08], div(0.5, #08)) + // #07: add + // grad[#05] += grad[#07] + // #05: scale + // grad[#03] += scale(grad[#05],#04) + // #03: sum + // grad[#02] += repeat(grad[#03], #02) + // #02: + // grad[#00] += scale(mul(#00, grad[#02]), 2.0) + // + // substitute and simplify: + // grad[#00] = scale(grad(#10), #09) + scale(mul(#00, grad[#02]), 2.0) + // grad[#02] = repeat(grad[#03], #02) + // grad[#02] = repeat(scale(grad[#05],#04), #02) + // grad[#02] = repeat(scale(grad[#07],#04), #02) + // grad[#02] = repeat(scale(mul(grad[#08], div(0.5, #08)),#04), #02) + // grad[#02] = repeat(scale(mul(neg(mul(grad[#09], div(#09,#08))), div(0.5, #08)),#04), #02) + // grad[#02] = repeat(scale(mul(neg(mul(sum(mul(grad[#10],#00)), div(#09,#08))), div(0.5, #08)),#04), #02) + // grad[#02] = repeat(-(sum(mul(grad[#10],#00)) * div(#09,#08) * div(0.5, #08) * (1/N)), #02) + // grad[#02] = repeat(-(sum(mul(grad[#10],#00)) * div(div(#01,#08),#08) * div(0.5, #08) * (1/N)), #02) + // grad[#02] = repeat(-(sum(mul(grad[#10],#00)) * div(1,#08*#08) * div(0.5, #08) * (1/N)), #02) + // grad[#02] = repeat(-(sum(mul(grad[#10],#00)) * div(1,#07) * div(0.5, #08) * (1/N)), #02) + // grad[#00] = scale(grad(#10), #09) + scale(mul(#00, grad[#02]), 2.0) + // grad[#00] = scale(grad(#10), #09) + scale(mul(#00, repeat(-(sum(mul(grad[#10],#00)) * div(1,#07) * div(0.5, #08) * (1/N)), #02)), 2.0) + // grad[#00] = scale(grad(#10), #09) + scale(scale(#00, -(sum(mul(grad[#10],#00)) * div(1,#07) * div(0.5, #08) * (1/N))), 2.0) + // grad[#00] = scale(grad(#10), #09) + scale(#00, -(sum(mul(grad[#10],#00)) * div(1,#07) * div(1,#08) * (1/N))) + // grad[#00] = scale(grad(#10), #09) + scale(#00, sum(mul(grad[#10],#00)) * div(1,#07*#08) * (-1/N)) + // grad[#00] = scale(grad(#10), #09) + scale(#00, sum(mul(grad[#10],#00)) * div(1,#07*#08) * (-1/N)) + // grad[#00] = scale(grad(#10), #09) + scale(#00, sum(mul(grad[#10],#00)) * div(1,mean_eps*rms) * (-1/N)) + // grad[#00] = scale(grad(#10), #09) + scale(#00, sum(mul(grad[#10],#00)) * div(-1,rms*N*mean_eps)) + // grad[#00] = scale(grad(#10), #09) + scale(#00, sum(mul(grad[#10],#00)) * div(-1,rms*N*(sum_xx/N+eps))) + // grad[#00] = scale(grad(#10), #09) + scale(#00, sum(mul(grad[#10],#00)) * div(-1,rms*N*sum_xx+rms*N*eps)) + // grad[#00] = scale(dz, rrms) + scale(x, sum(mul(dz,x)) * div(-1,rms*N*mean_eps)) + // grad[#00] = scale(dz, rrms) + scale(x, sum_xdz * div(-1,rms*N*mean_eps)) + // a = b*c + d*e + // a = b*c*f/f + d*e*f/f + // a = (b*c*f + d*e*f)*(1/f) + // a = (b*c*(1/c) + d*e*(1/c))*(1/(1/c)) + // a = (b + d*e/c)*c + // b = dz, c = rrms, d = x, e = sum_xdz * div(-1,rms*N*mean_eps) + // a = (dz + x*sum_xdz * div(-1,rms*N*mean_eps)/rrms)*rrms + // a = (dz + x*sum_xdz * div(-1,rms*N*mean_eps)*rms)*rrms + // a = (dz + x*sum_xdz * div(-rms,rms*N*mean_eps))*rrms + // a = (dz + x*sum_xdz * div(-1,N*mean_eps))*rrms + // a = (dz + x*div(-sum_xdz,N*mean_eps))*rrms + // a = (dz + x*div(-mean_xdz,mean_eps))*rrms + // grad[#00] = scale(dz + scale(x, div(-mean_xdz,mean_eps)),rrms) + // grad[#00] = scale(dz + scale(x, -mean_xdz/mean_eps),rrms) + // dx = scale(dz + scale(x, -mean_xdz/mean_eps),rrms) + } + // dx = scale(dz + scale(x, -mean_xdz/mean_eps),rrms) + // post-order: + // dx := x + // dx := scale(dx,-mean_xdz/mean_eps) + // dx := add(dx, dz) + // dx := scale(dx, rrms) + float * dx = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); + + ggml_vec_cpy_f32 (ne00, dx, x); + // ggml_vec_scale_f32(ne00, dx, -mean_xdz/mean_eps); + ggml_vec_scale_f32(ne00, dx, (float)(-sum_xdz)/sum_eps); + ggml_vec_acc_f32 (ne00, dx, dz); + ggml_vec_scale_f32(ne00, dx, rrms); + } + } + } +} + +static void ggml_compute_forward_rms_norm_back( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_rms_norm_back_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + + +// ggml_compute_forward_mul_mat + +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) +// helper function to determine if it is better to use BLAS or not +// for large matrices, BLAS is faster +static bool ggml_compute_forward_mul_mat_use_blas( + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + //const int64_t ne00 = src0->ne[0]; + //const int64_t ne01 = src0->ne[1]; + + const int64_t ne10 = src1->ne[0]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + + // TODO: find the optimal values for these + if (ggml_is_contiguous(src0) && + ggml_is_contiguous(src1) && + (ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) { + + /*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/ + return true; + } + + return false; +} +#endif + +static void ggml_compute_forward_mul_mat_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) + const int64_t ne10 = src1->ne[0]; +#endif + const int64_t ne11 = src1->ne[1]; +#ifndef NDEBUG + const int64_t ne12 = src1->ne[2]; + const int64_t ne13 = src1->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const int nb00 = src0->nb[0]; +#endif + const int nb01 = src0->nb[1]; + const int nb02 = src0->nb[2]; + const int nb03 = src0->nb[3]; + +#ifndef NDEBUG + const int nb10 = src1->nb[0]; +#endif + const int nb11 = src1->nb[1]; + const int nb12 = src1->nb[2]; + const int nb13 = src1->nb[3]; + + const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + assert(ne02 == ne12); + assert(ne03 == ne13); + assert(ne2 == ne12); + assert(ne3 == ne13); + + // we don't support permuted src0 or src1 + assert(nb00 == sizeof(float)); + assert(nb10 == sizeof(float)); + + // dst cannot be transposed or permuted + assert(nb0 == sizeof(float)); + assert(nb0 <= nb1); + assert(nb1 <= nb2); + assert(nb2 <= nb3); + + assert(ne0 == ne01); + assert(ne1 == ne11); + assert(ne2 == ne02); + assert(ne3 == ne03); + + // nb01 >= nb00 - src0 is not transposed + // compute by src0 rows + +#if defined(GGML_USE_CLBLAST) + if (ggml_cl_can_mul_mat(src0, src1, dst)) { + if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) { + ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize); + } + return; + } +#endif + +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) + if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { + if (params->ith != 0) { + return; + } + + if (params->type == GGML_TASK_INIT) { + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03); + const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13); + float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); + + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, + ne11, ne01, ne10, + 1.0f, y, ne10, + x, ne00, + 0.0f, d, ne01); + } + } + //printf("CBLAS F32 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3); + + return; + } +#endif + + if (params->type == GGML_TASK_INIT) { + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // parallelize by src0 rows using ggml_vec_dot_f32 + + // total rows in src0 + const int nr = ne01*ne02*ne03; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 indices + const int i03 = ir/(ne02*ne01); + const int i02 = (ir - i03*ne02*ne01)/ne01; + const int i01 = (ir - i03*ne02*ne01 - i02*ne01); + + for (int64_t ic = 0; ic < ne11; ++ic) { + // src1 indices + const int i13 = i03; + const int i12 = i02; + const int i11 = ic; + + // dst indices + const int i0 = i01; + const int i1 = i11; + const int i2 = i02; + const int i3 = i03; + + ggml_vec_dot_f32(ne00, + (float *) ((char *) dst->data + (i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), + (float *) ((char *) src0->data + (i01*nb01 + i02*nb02 + i03*nb03)), + (float *) ((char *) src1->data + (i11*nb11 + i12*nb12 + i13*nb13))); + } + } + + //int64_t t1 = ggml_perf_time_us(); + //static int64_t acc = 0; + //acc += t1 - t0; + //if (t1 - t0 > 10) { + // printf("\n"); + // printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03); + // printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03); + // printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13); + // printf("nb10 = %5d, nb11 = %5d, nb12 = %5d, nb13 = %5d\n", nb10, nb11, nb12, nb13); + + // printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc); + //} +} + +static void ggml_compute_forward_mul_mat_f16_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + const int64_t ne12 = src1->ne[2]; + const int64_t ne13 = src1->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + //const int64_t ne = ne0*ne1*ne2*ne3; + + const int nb00 = src0->nb[0]; + const int nb01 = src0->nb[1]; + const int nb02 = src0->nb[2]; + const int nb03 = src0->nb[3]; + + const int nb10 = src1->nb[0]; + const int nb11 = src1->nb[1]; + const int nb12 = src1->nb[2]; + const int nb13 = src1->nb[3]; + + const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + GGML_ASSERT(ne02 == ne12); + GGML_ASSERT(ne03 == ne13); + GGML_ASSERT(ne2 == ne12); + GGML_ASSERT(ne3 == ne13); + + // TODO: we don't support permuted src0 + GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); + + // dst cannot be transposed or permuted + GGML_ASSERT(nb0 == sizeof(float)); + GGML_ASSERT(nb0 <= nb1); + GGML_ASSERT(nb1 <= nb2); + GGML_ASSERT(nb2 <= nb3); + + GGML_ASSERT(ne0 == ne01); + GGML_ASSERT(ne1 == ne11); + GGML_ASSERT(ne2 == ne02); + GGML_ASSERT(ne3 == ne03); + + // nb01 >= nb00 - src0 is not transposed + // compute by src0 rows + +#if defined(GGML_USE_CLBLAST) + if (ggml_cl_can_mul_mat(src0, src1, dst)) { + if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) { + ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize); + } + return; + } +#endif + +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) + if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { + GGML_ASSERT(nb10 == sizeof(float)); + + if (params->ith != 0) { + return; + } + + if (params->type == GGML_TASK_INIT) { + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + float * const wdata = params->wdata; + { + size_t id = 0; + for (int64_t i01 = 0; i01 < ne01; ++i01) { + for (int64_t i00 = 0; i00 < ne00; ++i00) { + wdata[id++] = GGML_FP16_TO_FP32(*(ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00)); + } + } + + assert(id*sizeof(float) <= params->wsize); + } + + const float * x = wdata; + const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13); + + float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); + + // zT = y * xT + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, + ne11, ne01, ne10, + 1.0f, y, ne10, + x, ne00, + 0.0f, d, ne01); + } + } + + /*printf("CBLAS F16 = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);*/ + + return; + } +#endif + + if (params->type == GGML_TASK_INIT) { + ggml_fp16_t * const wdata = params->wdata; + + size_t id = 0; + for (int64_t i13 = 0; i13 < ne13; ++i13) { + for (int64_t i12 = 0; i12 < ne12; ++i12) { + for (int64_t i11 = 0; i11 < ne11; ++i11) { + for (int64_t i10 = 0; i10 < ne10; ++i10) { + wdata[id++] = GGML_FP32_TO_FP16(*(float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10)); + } + } + } + } + + GGML_ASSERT(id*sizeof(ggml_fp16_t) <= params->wsize); + + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // fp16 -> half the size, so divide by 2 + // TODO: do not support transposed src1 + assert(nb10/2 == sizeof(ggml_fp16_t)); + + // parallelize by src0 rows using ggml_vec_dot_f16 + + // total rows in src0 + const int nr = ne01*ne02*ne03; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + ggml_fp16_t * wdata = params->wdata; + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 indices + const int i03 = ir/(ne02*ne01); + const int i02 = (ir - i03*ne02*ne01)/ne01; + const int i01 = (ir - i03*ne02*ne01 - i02*ne01); + + const int i13 = i03; + const int i12 = i02; + + const int i0 = i01; + const int i2 = i02; + const int i3 = i03; + + ggml_fp16_t * src0_row = (ggml_fp16_t *) ((char *) src0->data + (i01*nb01 + i02*nb02 + i03*nb03)); + ggml_fp16_t * src1_col = wdata + ( 0 + i12*ne11 + i13*ne12*ne11)*ne00; + + float * dst_col = (float *) ((char *) dst->data + (i0*nb0 + 0*nb1 + i2*nb2 + i3*nb3)); + + for (int64_t ic = 0; ic < ne11; ++ic) { + ggml_vec_dot_f16(ne00, &dst_col[ic*ne0], src0_row, src1_col + ic*ne00); + } + } + + //int64_t t1 = ggml_time_us(); + //static int64_t acc = 0; + //acc += t1 - t0; + //if (t1 - t0 > 10) { + // printf("\n"); + // printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03); + // printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03); + // printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13); + + // printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc); + //} +} + +static void ggml_compute_forward_mul_mat_q_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + const int64_t ne12 = src1->ne[2]; + const int64_t ne13 = src1->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const int nb00 = src0->nb[0]; + const int nb01 = src0->nb[1]; + const int nb02 = src0->nb[2]; + const int nb03 = src0->nb[3]; + + const int nb10 = src1->nb[0]; + const int nb11 = src1->nb[1]; + const int nb12 = src1->nb[2]; + const int nb13 = src1->nb[3]; + + const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + GGML_ASSERT(ne02 == ne12); + GGML_ASSERT(ne03 == ne13); + GGML_ASSERT(ne2 == ne12); + GGML_ASSERT(ne3 == ne13); + + const enum ggml_type type = src0->type; + quantize_row_q_t const quantize_row_q_dot = quantize_fns[type].quantize_row_q_dot; + vec_dot_q_t const vec_dot_q = quantize_fns[type].vec_dot_q; + enum ggml_type const vec_dot_type = quantize_fns[type].vec_dot_type; + + // we don't support permuted src0 or src1 + GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]); + GGML_ASSERT(nb10 == sizeof(float)); + + // dst cannot be transposed or permuted + GGML_ASSERT(nb0 == sizeof(float)); + GGML_ASSERT(nb0 <= nb1); + GGML_ASSERT(nb1 <= nb2); + GGML_ASSERT(nb2 <= nb3); + + GGML_ASSERT(ne0 == ne01); + GGML_ASSERT(ne1 == ne11); + GGML_ASSERT(ne2 == ne02); + GGML_ASSERT(ne3 == ne03); + + // nb01 >= nb00 - src0 is not transposed + // compute by src0 rows + +#if defined(GGML_USE_CLBLAST) + if (ggml_cl_can_mul_mat(src0, src1, dst)) { + if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) { + ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize); + } + return; + } +#endif + +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) + if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { + if (params->ith != 0) { + return; + } + + if (params->type == GGML_TASK_INIT) { + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + float * const wdata = params->wdata; + dequantize_row_q_t const dequantize_row_q = quantize_fns[type].dequantize_row_q; + + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13); + + float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); + + { + size_t id = 0; + for (int64_t i01 = 0; i01 < ne01; ++i01) { + dequantize_row_q((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01, wdata + id, ne00); + id += ne00; + } + + assert(id*sizeof(float) <= params->wsize); + } + + const float * x = wdata; + + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, + ne11, ne01, ne10, + 1.0f, y, ne10, + x, ne00, + 0.0f, d, ne01); + } + } + + //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3); + + return; + } +#endif + + if (params->type == GGML_TASK_INIT) { + char * wdata = params->wdata; + const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type]; + + for (int64_t i13 = 0; i13 < ne13; ++i13) { + for (int64_t i12 = 0; i12 < ne12; ++i12) { + for (int64_t i11 = 0; i11 < ne11; ++i11) { + quantize_row_q_dot((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) wdata, ne10); + wdata += row_size; + } + } + } + + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // parallelize by src0 rows using ggml_vec_dot_q + + // total rows in src0 + const int nr = ne01*ne02*ne03; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + void * wdata = params->wdata; + const size_t row_size = ne00*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type]; + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 indices + const int i03 = ir/(ne02*ne01); + const int i02 = (ir - i03*ne02*ne01)/ne01; + const int i01 = (ir - i03*ne02*ne01 - i02*ne01); + + const int i13 = i03; + const int i12 = i02; + + const int i0 = i01; + const int i2 = i02; + const int i3 = i03; + + void * src0_row = (void *) ((char *) src0->data + (i01*nb01 + i02*nb02 + i03*nb03)); + char * src1_col = ((char *) wdata + ( (0 + i12*ne11 + i13*ne12*ne11)*row_size)); + + float * dst_col = (float *) ((char *) dst->data + (i0*nb0 + 0*nb1 + i2*nb2 + i3*nb3)); + + assert(ne00 % 32 == 0); + + for (int64_t ic = 0; ic < ne11; ++ic) { + vec_dot_q(ne00, &dst_col[ic*ne0], src0_row, (void *) (src1_col + ic*row_size)); + } + } + + //int64_t t1 = ggml_time_us(); + //static int64_t acc = 0; + //acc += t1 - t0; + //if (t1 - t0 > 10) { + // printf("\n"); + // printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03); + // printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03); + // printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13); + + // printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc); + //} +} + +static void ggml_compute_forward_mul_mat( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + { + ggml_compute_forward_mul_mat_q_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_mul_mat_f16_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_mul_mat_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_out_prod + + +static void ggml_compute_forward_out_prod_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + const int64_t ne03 = src0->ne[3]; + + const int64_t ne10 = src1->ne[0]; + //const int64_t ne11 = src1->ne[1]; + const int64_t ne12 = src1->ne[2]; + const int64_t ne13 = src1->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const int nb00 = src0->nb[0]; + const int nb01 = src0->nb[1]; + const int nb02 = src0->nb[2]; + const int nb03 = src0->nb[3]; + + const int nb10 = src1->nb[0]; + const int nb11 = src1->nb[1]; + const int nb12 = src1->nb[2]; + const int nb13 = src1->nb[3]; + + const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + GGML_ASSERT(ne02 == ne12); + GGML_ASSERT(ne03 == ne13); + GGML_ASSERT(ne2 == ne12); + GGML_ASSERT(ne3 == ne13); + + // we don't support permuted src0 or src1 + GGML_ASSERT(nb00 == sizeof(float)); + + // dst cannot be transposed or permuted + GGML_ASSERT(nb0 == sizeof(float)); + // GGML_ASSERT(nb0 <= nb1); + // GGML_ASSERT(nb1 <= nb2); + // GGML_ASSERT(nb2 <= nb3); + + GGML_ASSERT(ne0 == ne00); + GGML_ASSERT(ne1 == ne10); + GGML_ASSERT(ne2 == ne02); + GGML_ASSERT(ne3 == ne03); + + // nb01 >= nb00 - src0 is not transposed + // compute by src0 rows + + // TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod + // TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST) + + if (params->type == GGML_TASK_INIT) { + ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0); + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // parallelize by last three dimensions + + // total rows in dst + const int64_t nr = ne1*ne2*ne3; + + // rows per thread + const int64_t dr = (nr + nth - 1)/nth; + + // row range for this thread + const int64_t ir0 = dr*ith; + const int64_t ir1 = MIN(ir0 + dr, nr); + + // dst[:,:,:,:] = 0 + // for i2,i3: + // for i1: + // for i01: + // for i0: + // dst[i0,i1,i2,i3] += src0[i0,i01,i2,i3] * src1[i1,i01,i2,i3] + + for (int64_t ir = ir0; ir < ir1; ++ir) { + // dst indices + const int64_t i3 = ir/(ne2*ne1); + const int64_t i2 = (ir - i3*ne2*ne1)/ne1; + const int64_t i1 = (ir - i3*ne2*ne1 - i2*ne1); + + const int64_t i02 = i2; + const int64_t i03 = i3; + + //const int64_t i10 = i1; + const int64_t i12 = i2; + const int64_t i13 = i3; + + for (int64_t i01 = 0; i01 < ne01; ++i01) { + const int64_t i11 = i01; + + float * s0 = (float *) ((char *) src0->data + ( i01*nb01 + i02*nb02 + i03*nb03)); + float * s1 = (float *) ((char *) src1->data + (i1*nb10 + i11*nb11 + i12*nb12 + i13*nb13)); + float * d = (float *) ((char *) dst->data + ( i1*nb1 + i2*nb2 + i3*nb3)); + + ggml_vec_mad_f32(ne0, d, s0, *s1); + // for (int64_t i0 = 0; i0 < ne0; ++i0) { + // d[i0] += s0[i0] * s1[i1]; + // } + } + } + + //int64_t t1 = ggml_perf_time_us(); + //static int64_t acc = 0; + //acc += t1 - t0; + //if (t1 - t0 > 10) { + // printf("\n"); + // printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03); + // printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03); + // printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13); + // printf("nb10 = %5d, nb11 = %5d, nb12 = %5d, nb13 = %5d\n", nb10, nb11, nb12, nb13); + + // printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc); + //} +} + +static void ggml_compute_forward_out_prod( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + { + GGML_ASSERT(false); // todo + // ggml_compute_forward_out_prod_q_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_F16: + { + GGML_ASSERT(false); // todo + // ggml_compute_forward_out_prod_f16_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_out_prod_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_scale + +static void ggml_compute_forward_scale_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(dst)); + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + GGML_ASSERT(ggml_is_scalar(src1)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // scale factor + const float v = *(float *) src1->data; + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src0->ne[0]; + const int nr = ggml_nrows(src0); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + const size_t nb01 = src0->nb[1]; + + const size_t nb1 = dst->nb[1]; + + + for (int i1 = ir0; i1 < ir1; i1++) { + if (dst->data != src0->data) { + // src0 is same shape as dst => same indices + memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float)); + } + ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), v); + } +} + +static void ggml_compute_forward_scale( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_scale_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_set + +static void ggml_compute_forward_set_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); + + GGML_ASSERT(opt0->type == GGML_TYPE_I32); + GGML_ASSERT(ggml_nelements(opt0) == 5); + + // view src0 and dst with these strides and data offset inbytes during set + // nb0 is implicitely element_size because src0 and dst are contiguous + size_t nb1 = ((int32_t *) opt0->data)[0]; + size_t nb2 = ((int32_t *) opt0->data)[1]; + size_t nb3 = ((int32_t *) opt0->data)[2]; + size_t offset = ((int32_t *) opt0->data)[3]; + bool inplace = (bool) ((int32_t *) opt0->data)[4]; + + if (!inplace && (params->type == GGML_TASK_INIT)) { + // memcpy needs to be synchronized across threads to avoid race conditions. + // => do it in INIT phase + memcpy( + ((char *) dst->data), + ((char *) src0->data), + ggml_nbytes(dst)); + } + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(src1); + const int nc = src1->ne[0]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + const int64_t ne12 = src1->ne[2]; + const int64_t ne13 = src1->ne[3]; + + const size_t nb10 = src1->nb[0]; + const size_t nb11 = src1->nb[1]; + const size_t nb12 = src1->nb[2]; + const size_t nb13 = src1->nb[3]; + + // src0 and dst as viewed during set + const size_t nb0 = ggml_element_size(src0); + + const int im0 = (ne10 == 0 ? 0 : ne10-1); + const int im1 = (ne11 == 0 ? 0 : ne11-1); + const int im2 = (ne12 == 0 ? 0 : ne12-1); + const int im3 = (ne13 == 0 ? 0 : ne13-1); + + GGML_ASSERT(offset + im0*nb0 + im1*nb1 + im2*nb2 + im3*nb3 < ggml_nbytes(dst)); + + GGML_ASSERT(nb10 == sizeof(float)); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int ir = ir0; ir < ir1; ++ir) { + // src0 and dst are viewed with shape of src1 and offset + // => same indices + const int i3 = ir/(ne12*ne11); + const int i2 = (ir - i3*ne12*ne11)/ne11; + const int i1 = (ir - i3*ne12*ne11 - i2*ne11); + + ggml_vec_cpy_f32(nc, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + offset), + (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11)); + } +} + +static void ggml_compute_forward_set( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_set_f32(params, src0, src1, opt0, dst); + } break; + case GGML_TYPE_F16: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_cpy + +static void ggml_compute_forward_cpy( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + ggml_compute_forward_dup(params, src0, dst); +} + +// ggml_compute_forward_cont + +static void ggml_compute_forward_cont( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + ggml_compute_forward_dup(params, src0, dst); +} + +// ggml_compute_forward_reshape + +static void ggml_compute_forward_reshape( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + // NOP + UNUSED(params); + UNUSED(src0); + UNUSED(dst); +} + +// ggml_compute_forward_view + +static void ggml_compute_forward_view( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0) { + // NOP + UNUSED(params); + UNUSED(src0); +} + +// ggml_compute_forward_permute + +static void ggml_compute_forward_permute( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0) { + // NOP + UNUSED(params); + UNUSED(src0); +} + +// ggml_compute_forward_transpose + +static void ggml_compute_forward_transpose( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0) { + // NOP + UNUSED(params); + UNUSED(src0); +} + +// ggml_compute_forward_get_rows + +static void ggml_compute_forward_get_rows_q( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int nc = src0->ne[0]; + const int nr = ggml_nelements(src1); + const enum ggml_type type = src0->type; + dequantize_row_q_t const dequantize_row_q = quantize_fns[type].dequantize_row_q; + + assert( dst->ne[0] == nc); + assert( dst->ne[1] == nr); + assert(src0->nb[0] == GGML_TYPE_SIZE[type]); + + for (int i = 0; i < nr; ++i) { + const int r = ((int32_t *) src1->data)[i]; + + dequantize_row_q( + (const void *) ((char *) src0->data + r*src0->nb[1]), + (float *) ((char *) dst->data + i*dst->nb[1]), nc); + } +} + +static void ggml_compute_forward_get_rows_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int nc = src0->ne[0]; + const int nr = ggml_nelements(src1); + + assert( dst->ne[0] == nc); + assert( dst->ne[1] == nr); + assert(src0->nb[0] == sizeof(ggml_fp16_t)); + + for (int i = 0; i < nr; ++i) { + const int r = ((int32_t *) src1->data)[i]; + + for (int j = 0; j < nc; ++j) { + ggml_fp16_t v = ((ggml_fp16_t *) ((char *) src0->data + r*src0->nb[1]))[j]; + ((float *) ((char *) dst->data + i*dst->nb[1]))[j] = GGML_FP16_TO_FP32(v); + } + } +} + +static void ggml_compute_forward_get_rows_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int nc = src0->ne[0]; + const int nr = ggml_nelements(src1); + + assert( dst->ne[0] == nc); + assert( dst->ne[1] == nr); + assert(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < nr; ++i) { + const int r = ((int32_t *) src1->data)[i]; + + ggml_vec_cpy_f32(nc, + (float *) ((char *) dst->data + i*dst->nb[1]), + (float *) ((char *) src0->data + r*src0->nb[1])); + } +} + +static void ggml_compute_forward_get_rows( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + { + ggml_compute_forward_get_rows_q(params, src0, src1, dst); + } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_get_rows_f16(params, src0, src1, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_get_rows_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } + + //static bool first = true; + //printf("ne0 = %d, ne1 = %d, ne2 = %d\n", dst->ne[0], dst->ne[1], dst->ne[2]); + //if (first) { + // first = false; + //} else { + // for (int k = 0; k < dst->ne[1]; ++k) { + // for (int j = 0; j < dst->ne[0]/16; ++j) { + // for (int i = 0; i < 16; ++i) { + // printf("%8.4f ", ((float *) dst->data)[k*dst->ne[0] + j*16 + i]); + // } + // printf("\n"); + // } + // printf("\n"); + // } + // printf("\n"); + // exit(0); + //} +} + +// ggml_compute_forward_get_rows_back + +static void ggml_compute_forward_get_rows_back_f32_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + GGML_ASSERT(params->ith == 0); + GGML_ASSERT(ggml_are_same_shape(opt0, dst)); + GGML_ASSERT(ggml_is_contiguous(opt0)); + GGML_ASSERT(ggml_is_contiguous(dst)); + + ggml_compute_forward_dup_same_cont(params, opt0, dst); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int nc = src0->ne[0]; + const int nr = ggml_nelements(src1); + + GGML_ASSERT( dst->ne[0] == nc); + GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t)); + + for (int i = 0; i < nr; ++i) { + const int r = ((int32_t *) src1->data)[i]; + + for (int j = 0; j < nc; ++j) { + ggml_fp16_t v = ((ggml_fp16_t *) ((char *) src0->data + i*src0->nb[1]))[j]; + ((float *) ((char *) dst->data + r*dst->nb[1]))[j] += GGML_FP16_TO_FP32(v); + } + } +} + +static void ggml_compute_forward_get_rows_back_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + GGML_ASSERT(params->ith == 0); + GGML_ASSERT(ggml_are_same_shape(opt0, dst)); + GGML_ASSERT(ggml_is_contiguous(opt0)); + GGML_ASSERT(ggml_is_contiguous(dst)); + + // ggml_compute_forward_dup_same_cont(params, opt0, dst); + + if (params->type == GGML_TASK_INIT) { + memset(dst->data, 0, ggml_nbytes(dst)); + } + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int nc = src0->ne[0]; + const int nr = ggml_nelements(src1); + + GGML_ASSERT( dst->ne[0] == nc); + GGML_ASSERT(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < nr; ++i) { + const int r = ((int32_t *) src1->data)[i]; + + ggml_vec_add_f32(nc, + (float *) ((char *) dst->data + r*dst->nb[1]), + (float *) ((char *) dst->data + r*dst->nb[1]), + (float *) ((char *) src0->data + i*src0->nb[1])); + } +} + + +static void ggml_compute_forward_get_rows_back( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_get_rows_back_f32_f16(params, src0, src1, opt0, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_get_rows_back_f32(params, src0, src1, opt0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } + + //static bool first = true; + //printf("ne0 = %d, ne1 = %d, ne2 = %d\n", dst->ne[0], dst->ne[1], dst->ne[2]); + //if (first) { + // first = false; + //} else { + // for (int k = 0; k < dst->ne[1]; ++k) { + // for (int j = 0; j < dst->ne[0]/16; ++j) { + // for (int i = 0; i < 16; ++i) { + // printf("%8.4f ", ((float *) dst->data)[k*dst->ne[0] + j*16 + i]); + // } + // printf("\n"); + // } + // printf("\n"); + // } + // printf("\n"); + // exit(0); + //} +} + +// ggml_compute_forward_diag + +static void ggml_compute_forward_diag_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(params->ith == 0); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // TODO: handle transposed/permuted matrices + + const int ne00 = src0->ne[0]; + const int ne01 = src0->ne[1]; + const int ne02 = src0->ne[2]; + const int ne03 = src0->ne[3]; + const int ne0 = dst->ne[0]; + const int ne1 = dst->ne[1]; + const int ne2 = dst->ne[2]; + const int ne3 = dst->ne[3]; + GGML_ASSERT(ne00 == ne0); + GGML_ASSERT(ne00 == ne1); + GGML_ASSERT(ne01 == 1); + GGML_ASSERT(ne02 == ne2); + GGML_ASSERT(ne03 == ne3); + + const int nb00 = src0->nb[0]; + //const int nb01 = src0->nb[1]; + const int nb02 = src0->nb[2]; + const int nb03 = src0->nb[3]; + const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + GGML_ASSERT(nb00 == sizeof(float)); + GGML_ASSERT(nb0 == sizeof(float)); + + for (int i3 = 0; i3 < ne3; i3++) { + for (int i2 = 0; i2 < ne2; i2++) { + for (int i1 = 0; i1 < ne1; i1++) { + float * d = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); + float * s = (float *)((char *) src0->data + i3*nb03 + i2*nb02); + for (int i0 = 0; i0 < i1; i0++) { + d[i0] = 0; + } + d[i1] = s[i1]; + for (int i0 = i1+1; i0 < ne0; i0++) { + d[i0] = 0; + } + } + } + } +} + +static void ggml_compute_forward_diag( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_diag_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_diag_mask_inf + +static void ggml_compute_forward_diag_mask_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst, + const float value) { + GGML_ASSERT(src1->type == GGML_TYPE_I32); + GGML_ASSERT(ggml_nelements(src1) == 2); + + const int ith = params->ith; + const int nth = params->nth; + + const int n_past = ((int32_t *) src1->data)[0]; + const bool inplace = (bool)((int32_t *) src1->data)[1]; + + GGML_ASSERT(n_past >= 0); + + if (!inplace && (params->type == GGML_TASK_INIT)) { + // memcpy needs to be synchronized across threads to avoid race conditions. + // => do it in INIT phase + GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0)); + GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); + memcpy( + ((char *) dst->data), + ((char *) src0->data), + ggml_nbytes(dst)); + } + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // TODO: handle transposed/permuted matrices + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + const int nr = src0->ne[1]; + const int nz = n/nr; + + GGML_ASSERT( dst->nb[0] == sizeof(float)); + GGML_ASSERT(src0->nb[0] == sizeof(float)); + + for (int k = 0; k < nz; k++) { + for (int j = ith; j < nr; j += nth) { + for (int i = n_past; i < nc; i++) { + if (i > n_past + j) { + *(float *)((char *) dst->data + k*dst->nb[2] + j*dst->nb[1] + i*dst->nb[0]) = value; + } + } + } + } +} + +static void ggml_compute_forward_diag_mask_inf( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_diag_mask_f32(params, src0, src1, dst, -INFINITY); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +static void ggml_compute_forward_diag_mask_zero( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_diag_mask_f32(params, src0, src1, dst, 0); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_soft_max + +static void ggml_compute_forward_soft_max_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(dst)); + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // TODO: handle transposed/permuted matrices + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src0->ne[0]; + const int nr = ggml_nrows(src0); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float *sp = (float *)((char *) src0->data + i1*src0->nb[1]); + float *dp = (float *)((char *) dst->data + i1*dst->nb[1]); + +#ifndef NDEBUG + for (int i = 0; i < nc; ++i) { + //printf("p[%d] = %f\n", i, p[i]); + assert(!isnan(sp[i])); + } +#endif + + float max = -INFINITY; + ggml_vec_max_f32(nc, &max, sp); + + ggml_float sum = 0.0; + + uint16_t scvt; + for (int i = 0; i < nc; i++) { + if (sp[i] == -INFINITY) { + dp[i] = 0.0f; + } else { + // const float val = (sp[i] == -INFINITY) ? 0.0 : exp(sp[i] - max); + ggml_fp16_t s = GGML_FP32_TO_FP16(sp[i] - max); + memcpy(&scvt, &s, sizeof(scvt)); + const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]); + sum += (ggml_float)val; + dp[i] = val; + } + } + + assert(sum > 0.0); + + sum = 1.0/sum; + ggml_vec_scale_f32(nc, dp, sum); + +#ifndef NDEBUG + for (int i = 0; i < nc; ++i) { + assert(!isnan(dp[i])); + assert(!isinf(dp[i])); + } +#endif + } +} + +static void ggml_compute_forward_soft_max( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_soft_max_f32(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_soft_max_back + +static void ggml_compute_forward_soft_max_back_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(src1)); + GGML_ASSERT(ggml_is_contiguous(dst)); + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + GGML_ASSERT(ggml_are_same_shape(src1, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // TODO: handle transposed/permuted matrices + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src0->ne[0]; + const int nr = ggml_nrows(src0); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float *dy = (float *)((char *) src0->data + i1*src0->nb[1]); + float *y = (float *)((char *) src1->data + i1*src1->nb[1]); + float *dx = (float *)((char *) dst->data + i1*dst->nb[1]); + +#ifndef NDEBUG + for (int i = 0; i < nc; ++i) { + //printf("p[%d] = %f\n", i, p[i]); + assert(!isnan(dy[i])); + assert(!isnan(y[i])); + } +#endif + // Jii = yi - yi*yi + // Jij = -yi*yj + // J = diag(y)-y.T*y + // dx = J * dy + // dxk = sum_i(Jki * dyi) + // dxk = sum_i(-yk*yi * dyi) - (-yk*yk)*dyk + (yk - yk*yk)*dyk + // dxk = sum_i(-yk*yi * dyi) + yk*dyk + // dxk = -yk * sum_i(yi * dyi) + yk*dyk + // dxk = -yk * dot(y, dy) + yk*dyk + // dxk = yk * (- dot(y, dy) + dyk) + // dxk = yk * (dyk - dot(y, dy)) + // + // post-order: + // dot_y_dy := dot(y, dy) + // dx := dy + // dx := dx - dot_y_dy + // dx := dx * y + + // linear runtime, no additional memory + float dot_y_dy = 0; + ggml_vec_dot_f32 (nc, &dot_y_dy, y, dy); + ggml_vec_cpy_f32 (nc, dx, dy); + ggml_vec_acc1_f32(nc, dx, -dot_y_dy); + ggml_vec_mul_f32 (nc, dx, dx, y); + +#ifndef NDEBUG + for (int i = 0; i < nc; ++i) { + assert(!isnan(dx[i])); + assert(!isinf(dx[i])); + } +#endif + } +} + +static void ggml_compute_forward_soft_max_back( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_soft_max_back_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_alibi + +static void ggml_compute_forward_alibi_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 3); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n_past = ((int32_t *) src1->data)[0]; + const int n_head = ((int32_t *) src1->data)[1]; + const float max_bias = ((float *) src1->data)[2]; + + assert(n_past >= 0); + + const int ne0 = src0->ne[0]; // all_seq_len = n_past + ne1 + const int ne1 = src0->ne[1]; // seq_len_without_past + //const int ne2 = src0->ne[2]; // n_head -> this is k + //const int ne3 = src0->ne[3]; // 1 -> bsz + + const int n = ggml_nrows(src0); + const int ne2_ne3 = n/ne1; // ne2*ne3 + + const int nb0 = src0->nb[0]; + const int nb1 = src0->nb[1]; + const int nb2 = src0->nb[2]; + //const int nb3 = src0->nb[3]; + + assert(nb0 == sizeof(float)); + assert(ne1 + n_past == ne0); (void) n_past; + + // add alibi to src0 (KQ_scaled) + const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); + + const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor); + const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor); + + for (int i = 0; i < ne0; i++) { + for (int j = 0; j < ne1; j++) { + for (int k = 0; k < ne2_ne3; k++) { + float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2); + float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2); + + // TODO: k*nb2 or k*nb3 + + float m_k; + + if (k < n_heads_log2_floor) { + m_k = powf(m0, k + 1); + } else { + m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1); + } + + pdst[0] = (i-ne0+1) * m_k + src[0]; + + } + } + } +} + +static void ggml_compute_forward_alibi_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 3); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n_past = ((int32_t *) src1->data)[0]; + const int n_head = ((int32_t *) src1->data)[1]; + const float max_bias = ((float *) src1->data)[2]; + + assert(n_past >= 0); + + const int ne0 = src0->ne[0]; // all_seq_len = n_past + ne1 + const int ne1 = src0->ne[1]; // seq_len_without_past + //const int ne2 = src0->ne[2]; // n_head -> this is k + //const int ne3 = src0->ne[3]; // 1 -> bsz + + const int n = ggml_nrows(src0); + const int ne2_ne3 = n/ne1; // ne2*ne3 + + const int nb0 = src0->nb[0]; + const int nb1 = src0->nb[1]; + const int nb2 = src0->nb[2]; + //const int nb3 = src0->nb[3]; + + assert(nb0 == sizeof(ggml_fp16_t)); + assert(ne1 + n_past == ne0); (void) n_past; + + // add alibi to src0 (KQ_scaled) + const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); + + const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor); + const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor); + + for (int i = 0; i < ne0; i++) { + for (int j = 0; j < ne1; j++) { + for (int k = 0; k < ne2_ne3; k++) { + ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2); + float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2); + + // TODO: k*nb2 or k*nb3 + + float m_k; + + if (k < n_heads_log2_floor) { + m_k = powf(m0, k + 1); + } else { + m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1); + } + + // we return F32 + pdst[0] = (i-ne0+1) * m_k + GGML_FP16_TO_FP32(src[0]); + } + } + } +} + +static void ggml_compute_forward_alibi( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_alibi_f16(params, src0, src1, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_alibi_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + case GGML_TYPE_Q8_K: + case GGML_TYPE_I8: + case GGML_TYPE_I16: + case GGML_TYPE_I32: + case GGML_TYPE_COUNT: + { + GGML_ASSERT(false); + } break; + } +} + + +// ggml_compute_forward_clamp + +static void ggml_compute_forward_clamp_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 2); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int min = ((float *) src1->data)[0]; + const int max = ((float *) src1->data)[1]; + + const int ith = params->ith; + const int nth = params->nth; + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + + GGML_ASSERT( nb0 == sizeof(float)); + GGML_ASSERT(nb00 == sizeof(float)); + + for (int j = ith; j < n; j += nth) { + float * dst_ptr = (float *) ((char *) dst->data + j*nb1); + float * src0_ptr = (float *) ((char *) src0->data + j*nb01); + + for (int i = 0; i < nc; i++) { + dst_ptr[i] = MAX(MIN(src0_ptr[i], max), min); + } + } +} + +static void ggml_compute_forward_clamp( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_clamp_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_F16: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + case GGML_TYPE_Q8_K: + case GGML_TYPE_I8: + case GGML_TYPE_I16: + case GGML_TYPE_I32: + case GGML_TYPE_COUNT: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_rope + +static void ggml_compute_forward_rope_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(src1->type == GGML_TYPE_I32); + GGML_ASSERT(ggml_nelements(src1) == 3); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n_past = ((int32_t *) src1->data)[0]; + const int n_dims = ((int32_t *) src1->data)[1]; + const int mode = ((int32_t *) src1->data)[2]; + + assert(n_past >= 0); + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3); + //printf("n_past = %d, ne2 = %d\n", n_past, ne2); + + GGML_ASSERT(nb00 == sizeof(float)); + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(dst); + + GGML_ASSERT(n_dims <= ne0); + GGML_ASSERT(n_dims % 2 == 0); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + // row index used to determine which thread to use + int ir = 0; + + const float theta_scale = powf(10000.0, -2.0f/n_dims); + + const bool is_neox = mode & 2; + + for (int64_t i3 = 0; i3 < ne3; i3++) { + for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) { + const int64_t p = ((mode & 1) == 0 ? n_past + i2 : i2); + for (int64_t i1 = 0; i1 < ne1; i1++) { + if (ir++ < ir0) continue; + if (ir > ir1) break; + + float theta = (float)p; + + if (!is_neox) { + for (int64_t i0 = 0; i0 < ne0; i0 += 2) { + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + + theta *= theta_scale; + + const float * const src = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float x0 = src[0]; + const float x1 = src[1]; + + dst_data[0] = x0*cos_theta - x1*sin_theta; + dst_data[1] = x0*sin_theta + x1*cos_theta; + } + } else { + // TODO: this is probably wrong, but I can't figure it out .. + // ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt_neox/modeling_gpt_neox.py#LL251C1-L294C28 + for (int64_t ib = 0; ib < ne0/n_dims; ++ib) { + for (int64_t ic = 0; ic < n_dims; ic += 2) { + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + + theta *= theta_scale; + + const int64_t i0 = ib*n_dims + ic/2; + + const float * const src = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float x0 = src[0]; + const float x1 = src[n_dims/2]; + + dst_data[0] = x0*cos_theta - x1*sin_theta; + dst_data[n_dims/2] = x0*sin_theta + x1*cos_theta; + } + } + } + } + } + } +} + +static void ggml_compute_forward_rope_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(src1->type == GGML_TYPE_I32); + GGML_ASSERT(ggml_nelements(src1) == 3); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n_past = ((int32_t *) src1->data)[0]; + const int n_dims = ((int32_t *) src1->data)[1]; + const int mode = ((int32_t *) src1->data)[2]; + + assert(n_past >= 0); + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3); + //printf("n_past = %d, ne2 = %d\n", n_past, ne2); + + GGML_ASSERT(nb0 == sizeof(ggml_fp16_t)); + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(dst); + + GGML_ASSERT(n_dims <= ne0); + GGML_ASSERT(n_dims % 2 == 0); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + // row index used to determine which thread to use + int ir = 0; + + const float theta_scale = powf(10000.0, -2.0f/n_dims); + + const bool is_neox = mode & 2; + + for (int64_t i3 = 0; i3 < ne3; i3++) { + for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) { + const int64_t p = ((mode & 1) == 0 ? n_past + i2 : i2); + for (int64_t i1 = 0; i1 < ne1; i1++) { + if (ir++ < ir0) continue; + if (ir > ir1) break; + + float theta = (float)p; + + if (!is_neox) { + for (int64_t i0 = 0; i0 < ne0; i0 += 2) { + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + + theta *= theta_scale; + + const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float x0 = GGML_FP16_TO_FP32(src[0]); + const float x1 = GGML_FP16_TO_FP32(src[1]); + + dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); + dst_data[1] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); + } + } else { + // TODO: this is probably wrong, but I can't figure it out .. + // ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt_neox/modeling_gpt_neox.py#LL251C1-L294C28 + for (int64_t ib = 0; ib < ne0/n_dims; ++ib) { + for (int64_t ic = 0; ic < n_dims; ic += 2) { + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + + theta *= theta_scale; + + const int64_t i0 = ib*n_dims + ic/2; + + const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float x0 = GGML_FP16_TO_FP32(src[0]); + const float x1 = GGML_FP16_TO_FP32(src[n_dims/2]); + + dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); + dst_data[n_dims/2] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); + } + } + } + } + } + } +} + +static void ggml_compute_forward_rope( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_rope_f16(params, src0, src1, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_rope_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_rope_back + +static void ggml_compute_forward_rope_back_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 3); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // y = rope(x, src1) + // dx = rope_back(dy, src1) + // src0 is dy, src1 contains options + + const int n_past = ((int32_t *) src1->data)[0]; + const int n_dims = ((int32_t *) src1->data)[1]; + const int mode = ((int32_t *) src1->data)[2]; + + assert(n_past >= 0); + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + + //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3); + //printf("n_past = %d, ne2 = %d\n", n_past, ne2); + + assert(nb0 == sizeof(float)); + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(dst); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + // row index used to determine which thread to use + int ir = 0; + + const float theta_scale = powf(10000.0, -2.0f/n_dims); + + const bool is_neox = mode & 2; + + for (int64_t i3 = 0; i3 < ne3; i3++) { + for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) { + const int64_t p = ((mode & 1) == 0 ? n_past + i2 : i2); + for (int64_t i1 = 0; i1 < ne1; i1++) { + if (ir++ < ir0) continue; + if (ir > ir1) break; + + float theta = (float)p; + + if (!is_neox) { + for (int64_t i0 = 0; i0 < ne0; i0 += 2) { + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + + theta *= theta_scale; + + const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float dy0 = dy[0]; + const float dy1 = dy[1]; + + dx[0] = dy0*cos_theta + dy1*sin_theta; + dx[1] = - dy0*sin_theta + dy1*cos_theta; + } + } else { + for (int64_t ib = 0; ib < ne0/n_dims; ++ib) { + for (int64_t ic = 0; ic < n_dims; ic += 2) { + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + + theta *= theta_scale; + + const int64_t i0 = ib*n_dims + ic/2; + + const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float dy0 = dy[0]; + const float dy1 = dy[n_dims/2]; + + dx[0] = dy0*cos_theta + dy1*sin_theta; + dx[n_dims/2] = - dy0*sin_theta + dy1*cos_theta; + } + } + } + } + } + } +} + +static void ggml_compute_forward_rope_back_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 3); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + // y = rope(x, src1) + // dx = rope_back(dy, src1) + // src0 is dy, src1 contains options + + const int n_past = ((int32_t *) src1->data)[0]; + const int n_dims = ((int32_t *) src1->data)[1]; + const int mode = ((int32_t *) src1->data)[2]; + + assert(n_past >= 0); + + const size_t nb00 = src0->nb[0]; + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const size_t nb0 = dst->nb[0]; + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + + //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3); + //printf("n_past = %d, ne2 = %d\n", n_past, ne2); + + assert(nb0 == sizeof(ggml_fp16_t)); + + const int ith = params->ith; + const int nth = params->nth; + + const int nr = ggml_nrows(dst); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + // row index used to determine which thread to use + int ir = 0; + + const float theta_scale = powf(10000.0, -2.0f/n_dims); + + const bool is_neox = mode & 2; + + for (int64_t i3 = 0; i3 < ne3; i3++) { + for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) { + const int64_t p = ((mode & 1) == 0 ? n_past + i2 : i2); + for (int64_t i1 = 0; i1 < ne1; i1++) { + if (ir++ < ir0) continue; + if (ir > ir1) break; + + float theta = (float)p; + + if (!is_neox) { + for (int64_t i0 = 0; i0 < ne0; i0 += 2) { + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + + theta *= theta_scale; + + const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float dy0 = GGML_FP16_TO_FP32(dy[0]); + const float dy1 = GGML_FP16_TO_FP32(dy[1]); + + dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta); + dx[1] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta); + } + } else { + for (int64_t ib = 0; ib < ne0/n_dims; ++ib) { + for (int64_t ic = 0; ic < n_dims; ic += 2) { + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + + theta *= theta_scale; + + const int64_t i0 = ib*n_dims + ic/2; + + const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); + ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); + + const float dy0 = GGML_FP16_TO_FP32(dy[0]); + const float dy1 = GGML_FP16_TO_FP32(dy[n_dims/2]); + + dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta); + dx[n_dims/2] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta); + } + } + } + } + } + } +} + +static void ggml_compute_forward_rope_back( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_rope_back_f16(params, src0, src1, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_rope_back_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_conv_1d_1s + +static void ggml_compute_forward_conv_1d_1s_f16_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + //const int64_t ne03 = src0->ne[3]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + //const int64_t ne12 = src1->ne[2]; + //const int64_t ne13 = src1->ne[3]; + + //const int64_t ne0 = dst->ne[0]; + //const int64_t ne1 = dst->ne[1]; + //const int64_t ne2 = dst->ne[2]; + //const int64_t ne3 = dst->ne[3]; + //const int64_t ne = ne0*ne1*ne2*ne3; + + const int nb00 = src0->nb[0]; + const int nb01 = src0->nb[1]; + const int nb02 = src0->nb[2]; + //const int nb03 = src0->nb[3]; + + const int nb10 = src1->nb[0]; + const int nb11 = src1->nb[1]; + //const int nb12 = src1->nb[2]; + //const int nb13 = src1->nb[3]; + + //const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + //const int nb2 = dst->nb[2]; + //const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + const int nk = ne00; + const int nh = nk/2; + + const int ew0 = ggml_up32(ne01); + + GGML_ASSERT(ne00 % 2 == 1); // TODO: support even kernel sizes + GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nb10 == sizeof(float)); + + if (params->type == GGML_TASK_INIT) { + // TODO: fix this memset (wsize is overestimated) + memset(params->wdata, 0, params->wsize); + + // prepare kernel data (src0) + { + ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0; + + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = 0; i01 < ne01; i01++) { + const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01); + ggml_fp16_t * dst_data = wdata + i02*ew0*ne00; + for (int64_t i00 = 0; i00 < ne00; i00++) { + dst_data[i00*ew0 + i01] = src[i00]; + } + } + } + } + + // prepare source data (src1) + { + ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + ne02*ew0*ne00; + + for (int64_t i11 = 0; i11 < ne11; i11++) { + const float * const src = (float *)((char *) src1->data + i11*nb11); + ggml_fp16_t * dst_data = wdata; + for (int64_t i10 = 0; i10 < ne10; i10++) { + dst_data[(i10 + nh)*ew0 + i11] = GGML_FP32_TO_FP16(src[i10]); + } + } + } + + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // total rows in dst + const int nr = ne02; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * dst_data = (float *)((char *) dst->data + i1*nb1); + for (int64_t i0 = 0; i0 < ne10; ++i0) { + dst_data[i0] = 0; + for (int k = -nh; k <= nh; k++) { + float v = 0.0f; + ggml_vec_dot_f16(ew0, &v, + (ggml_fp16_t *) params->wdata + i1*ew0*ne00 + (nh + k)*ew0, + (ggml_fp16_t *) params->wdata + ne02*ew0*ne00 + (i0 + nh + k)*ew0); + + dst_data[i0] += v; + } + } + } +} + +static void ggml_compute_forward_conv_1d_1s_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + //const int64_t ne03 = src0->ne[3]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + //const int64_t ne12 = src1->ne[2]; + //const int64_t ne13 = src1->ne[3]; + + //const int64_t ne0 = dst->ne[0]; + //const int64_t ne1 = dst->ne[1]; + //const int64_t ne2 = dst->ne[2]; + //const int64_t ne3 = dst->ne[3]; + //const int64_t ne = ne0*ne1*ne2*ne3; + + const int nb00 = src0->nb[0]; + const int nb01 = src0->nb[1]; + const int nb02 = src0->nb[2]; + //const int nb03 = src0->nb[3]; + + const int nb10 = src1->nb[0]; + const int nb11 = src1->nb[1]; + //const int nb12 = src1->nb[2]; + //const int nb13 = src1->nb[3]; + + //const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + //const int nb2 = dst->nb[2]; + //const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + const int nk = ne00; + const int nh = nk/2; + + const int ew0 = ggml_up32(ne01); + + GGML_ASSERT(ne00 % 2 == 1); // TODO: support even kernel sizes + GGML_ASSERT(nb00 == sizeof(float)); + GGML_ASSERT(nb10 == sizeof(float)); + + if (params->type == GGML_TASK_INIT) { + // TODO: fix this memset (wsize is overestimated) + memset(params->wdata, 0, params->wsize); + + // prepare kernel data (src0) + { + float * const wdata = (float *) params->wdata + 0; + + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = 0; i01 < ne01; i01++) { + const float * const src = (float *)((char *) src0->data + i02*nb02 + i01*nb01); + float * dst_data = wdata + i02*ew0*ne00; + for (int64_t i00 = 0; i00 < ne00; i00++) { + dst_data[i00*ew0 + i01] = src[i00]; + } + } + } + } + + // prepare source data (src1) + { + float * const wdata = (float *) params->wdata + ne02*ew0*ne00; + + for (int64_t i11 = 0; i11 < ne11; i11++) { + const float * const src = (float *)((char *) src1->data + i11*nb11); + float * dst_data = wdata; + for (int64_t i10 = 0; i10 < ne10; i10++) { + dst_data[(i10 + nh)*ew0 + i11] = src[i10]; + } + } + } + + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // total rows in dst + const int nr = ne02; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * dst_data = (float *)((char *) dst->data + i1*nb1); + for (int64_t i0 = 0; i0 < ne10; ++i0) { + dst_data[i0] = 0; + for (int k = -nh; k <= nh; k++) { + float v = 0.0f; + ggml_vec_dot_f32(ew0, &v, + (float *) params->wdata + i1*ew0*ne00 + (nh + k)*ew0, + (float *) params->wdata + ne02*ew0*ne00 + (i0 + nh + k)*ew0); + + dst_data[i0] += v; + } + } + } +} + +static void ggml_compute_forward_conv_1d_1s( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_conv_1d_1s_f16_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_conv_1d_1s_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_conv_1d_2s + +static void ggml_compute_forward_conv_1d_2s_f16_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + //const int64_t ne03 = src0->ne[3]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + //const int64_t ne12 = src1->ne[2]; + //const int64_t ne13 = src1->ne[3]; + + //const int64_t ne0 = dst->ne[0]; + //const int64_t ne1 = dst->ne[1]; + //const int64_t ne2 = dst->ne[2]; + //const int64_t ne3 = dst->ne[3]; + //const int64_t ne = ne0*ne1*ne2*ne3; + + const int nb00 = src0->nb[0]; + const int nb01 = src0->nb[1]; + const int nb02 = src0->nb[2]; + //const int nb03 = src0->nb[3]; + + const int nb10 = src1->nb[0]; + const int nb11 = src1->nb[1]; + //const int nb12 = src1->nb[2]; + //const int nb13 = src1->nb[3]; + + //const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + //const int nb2 = dst->nb[2]; + //const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + const int nk = ne00; + const int nh = nk/2; + + const int ew0 = ggml_up32(ne01); + + GGML_ASSERT(ne00 % 2 == 1); // TODO: support even kernel sizes + GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nb10 == sizeof(float)); + + if (params->type == GGML_TASK_INIT) { + // TODO: fix this memset (wsize is overestimated) + memset(params->wdata, 0, params->wsize); + + // prepare kernel data (src0) + { + ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0; + + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = 0; i01 < ne01; i01++) { + const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01); + ggml_fp16_t * dst_data = wdata + i02*ew0*ne00; + for (int64_t i00 = 0; i00 < ne00; i00++) { + dst_data[i00*ew0 + i01] = src[i00]; + } + } + } + } + + // prepare source data (src1) + { + ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + ne02*ew0*ne00; + + for (int64_t i11 = 0; i11 < ne11; i11++) { + const float * const src = (float *)((char *) src1->data + i11*nb11); + ggml_fp16_t * dst_data = wdata; + for (int64_t i10 = 0; i10 < ne10; i10++) { + dst_data[(i10 + nh)*ew0 + i11] = GGML_FP32_TO_FP16(src[i10]); + } + } + } + + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // total rows in dst + const int nr = ne02; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * dst_data = (float *)((char *) dst->data + i1*nb1); + for (int64_t i0 = 0; i0 < ne10; i0 += 2) { + dst_data[i0/2] = 0; + for (int k = -nh; k <= nh; k++) { + float v = 0.0f; + ggml_vec_dot_f16(ew0, &v, + (ggml_fp16_t *) params->wdata + i1*ew0*ne00 + (nh + k)*ew0, + (ggml_fp16_t *) params->wdata + ne02*ew0*ne00 + (i0 + nh + k)*ew0); + + dst_data[i0/2] += v; + } + } + } +} + +static void ggml_compute_forward_conv_1d_2s_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + //const int64_t ne03 = src0->ne[3]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + //const int64_t ne12 = src1->ne[2]; + //const int64_t ne13 = src1->ne[3]; + + //const int64_t ne0 = dst->ne[0]; + //const int64_t ne1 = dst->ne[1]; + //const int64_t ne2 = dst->ne[2]; + //const int64_t ne3 = dst->ne[3]; + //const int64_t ne = ne0*ne1*ne2*ne3; + + const int nb00 = src0->nb[0]; + const int nb01 = src0->nb[1]; + const int nb02 = src0->nb[2]; + //const int nb03 = src0->nb[3]; + + const int nb10 = src1->nb[0]; + const int nb11 = src1->nb[1]; + //const int nb12 = src1->nb[2]; + //const int nb13 = src1->nb[3]; + + //const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + //const int nb2 = dst->nb[2]; + //const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + const int nk = ne00; + const int nh = nk/2; + + const int ew0 = ggml_up32(ne01); + + GGML_ASSERT(ne00 % 2 == 1); // TODO: support even kernel sizes + GGML_ASSERT(nb00 == sizeof(float)); + GGML_ASSERT(nb10 == sizeof(float)); + + if (params->type == GGML_TASK_INIT) { + // TODO: fix this memset (wsize is overestimated) + memset(params->wdata, 0, params->wsize); + + // prepare kernel data (src0) + { + float * const wdata = (float *) params->wdata + 0; + + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = 0; i01 < ne01; i01++) { + const float * const src = (float *)((char *) src0->data + i02*nb02 + i01*nb01); + float * dst_data = wdata + i02*ew0*ne00; + for (int64_t i00 = 0; i00 < ne00; i00++) { + dst_data[i00*ew0 + i01] = src[i00]; + } + } + } + } + + // prepare source data (src1) + { + float * const wdata = (float *) params->wdata + ne02*ew0*ne00; + + for (int64_t i11 = 0; i11 < ne11; i11++) { + const float * const src = (float *)((char *) src1->data + i11*nb11); + float * dst_data = wdata; + for (int64_t i10 = 0; i10 < ne10; i10++) { + dst_data[(i10 + nh)*ew0 + i11] = src[i10]; + } + } + } + + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // total rows in dst + const int nr = ne02; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * dst_data = (float *)((char *) dst->data + i1*nb1); + for (int64_t i0 = 0; i0 < ne10; i0 += 2) { + dst_data[i0/2] = 0; + for (int k = -nh; k <= nh; k++) { + float v = 0.0f; + ggml_vec_dot_f32(ew0, &v, + (float *) params->wdata + i1*ew0*ne00 + (nh + k)*ew0, + (float *) params->wdata + ne02*ew0*ne00 + (i0 + nh + k)*ew0); + + dst_data[i0/2] += v; + } + } + } +} + +static void ggml_compute_forward_conv_1d_2s( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_conv_1d_2s_f16_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_conv_1d_2s_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_flash_attn + +static void ggml_compute_forward_flash_attn_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * q, + const struct ggml_tensor * k, + const struct ggml_tensor * v, + const bool masked, + struct ggml_tensor * dst) { + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t neq0 = q->ne[0]; + const int64_t neq1 = q->ne[1]; + const int64_t neq2 = q->ne[2]; + const int64_t neq3 = q->ne[3]; + + const int64_t nek0 = k->ne[0]; + const int64_t nek1 = k->ne[1]; + //const int64_t nek2 = k->ne[2]; + //const int64_t nek3 = k->ne[3]; + + //const int64_t nev0 = v->ne[0]; + const int64_t nev1 = v->ne[1]; + //const int64_t nev2 = v->ne[2]; + //const int64_t nev3 = v->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + //const int64_t ne2 = dst->ne[2]; + //const int64_t ne3 = dst->ne[3]; + + const int nbk0 = k->nb[0]; + const int nbk1 = k->nb[1]; + const int nbk2 = k->nb[2]; + const int nbk3 = k->nb[3]; + + const int nbq0 = q->nb[0]; + const int nbq1 = q->nb[1]; + const int nbq2 = q->nb[2]; + const int nbq3 = q->nb[3]; + + const int nbv0 = v->nb[0]; + const int nbv1 = v->nb[1]; + const int nbv2 = v->nb[2]; + const int nbv3 = v->nb[3]; + + const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + const int64_t D = neq0; + const int64_t N = neq1; + const int64_t P = nek1 - N; + const int64_t M = P + N; + + const int Mup = ggml_up(M, GGML_SOFT_MAX_UNROLL); + + GGML_ASSERT(ne0 == D); + GGML_ASSERT(ne1 == N); + GGML_ASSERT(P >= 0); + + GGML_ASSERT(nbq0 == sizeof(float)); + GGML_ASSERT(nbk0 == sizeof(float)); + GGML_ASSERT(nbv0 == sizeof(float)); + + GGML_ASSERT(neq0 == D); + GGML_ASSERT(nek0 == D); + GGML_ASSERT(nev1 == D); + + GGML_ASSERT(neq1 == N); + GGML_ASSERT(nek1 == N + P); + GGML_ASSERT(nev1 == D); + + // dst cannot be transposed or permuted + GGML_ASSERT(nb0 == sizeof(float)); + GGML_ASSERT(nb0 <= nb1); + GGML_ASSERT(nb1 <= nb2); + GGML_ASSERT(nb2 <= nb3); + + if (params->type == GGML_TASK_INIT) { + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // parallelize by q rows using ggml_vec_dot_f32 + + // total rows in q + const int nr = neq1*neq2*neq3; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + const float scale = 1.0f/sqrtf(D); + + //printf("P=%d N=%d D=%d ir0=%d ir1=%d scale = %f\n", P, N, D, ir0, ir1, scale); + + for (int ir = ir0; ir < ir1; ++ir) { + // q indices + const int iq3 = ir/(neq2*neq1); + const int iq2 = (ir - iq3*neq2*neq1)/neq1; + const int iq1 = (ir - iq3*neq2*neq1 - iq2*neq1); + + float * S = (float *) params->wdata + ith*(Mup + CACHE_LINE_SIZE_F32); + + for (int i = M; i < Mup; ++i) { + S[i] = -INFINITY; + } + + for (int64_t ic = 0; ic < nek1; ++ic) { + // k indices + const int ik3 = iq3; + const int ik2 = iq2; + const int ik1 = ic; + + // S indices + const int i1 = ik1; + + ggml_vec_dot_f32(neq0, + S + i1, + (float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), + (float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); + } + + // scale + ggml_vec_scale_f32(nek1, S, scale); + + if (masked) { + for (int64_t i = P; i < M; i++) { + if (i > P + iq1) { + S[i] = -INFINITY; + } + } + } + + // softmax + { + float max = -INFINITY; + ggml_vec_max_f32(M, &max, S); + + ggml_float sum = 0.0; + { +#ifdef GGML_SOFT_MAX_ACCELERATE + max = -max; + vDSP_vsadd(S, 1, &max, S, 1, Mup); + vvexpf(S, S, &Mup); + ggml_vec_sum_f32(Mup, &sum, S); +#else + uint16_t scvt[GGML_SOFT_MAX_UNROLL]; + ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 }; + + for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) { + float * SS = S + i; + + for (int j = 0; j < GGML_SOFT_MAX_UNROLL; ++j) { + if (SS[j] == -INFINITY) { + SS[j] = 0.0f; + } else { + ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max); + memcpy(&scvt[j], &s, sizeof(uint16_t)); + const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]); + sump[j] += (ggml_float)val; + SS[j] = val; + } + } + } + + for (int i = 0; i < GGML_SOFT_MAX_UNROLL; i++) { + sum += sump[i]; + } +#endif + } + + assert(sum > 0.0); + + sum = 1.0/sum; + ggml_vec_scale_f32(M, S, sum); + +#ifndef NDEBUG + for (int i = 0; i < M; ++i) { + assert(!isnan(S[i])); + assert(!isinf(S[i])); + } +#endif + } + + for (int64_t ic = 0; ic < nev1; ++ic) { + // dst indices + const int i1 = iq1; + const int i2 = iq2; + const int i3 = iq3; + + ggml_vec_dot_f32(nek1, + (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), + (float *) ((char *) v->data + ( ic*nbv1 + i2*nbv2 + i3*nbv3)), + S); + } + } +} + +static void ggml_compute_forward_flash_attn_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * q, + const struct ggml_tensor * k, + const struct ggml_tensor * v, + const bool masked, + struct ggml_tensor * dst) { + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t neq0 = q->ne[0]; + const int64_t neq1 = q->ne[1]; + const int64_t neq2 = q->ne[2]; + const int64_t neq3 = q->ne[3]; + + const int64_t nek0 = k->ne[0]; + const int64_t nek1 = k->ne[1]; + //const int64_t nek2 = k->ne[2]; + //const int64_t nek3 = k->ne[3]; + + //const int64_t nev0 = v->ne[0]; + const int64_t nev1 = v->ne[1]; + //const int64_t nev2 = v->ne[2]; + //const int64_t nev3 = v->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + //const int64_t ne2 = dst->ne[2]; + //const int64_t ne3 = dst->ne[3]; + + const int nbk0 = k->nb[0]; + const int nbk1 = k->nb[1]; + const int nbk2 = k->nb[2]; + const int nbk3 = k->nb[3]; + + const int nbq0 = q->nb[0]; + const int nbq1 = q->nb[1]; + const int nbq2 = q->nb[2]; + const int nbq3 = q->nb[3]; + + const int nbv0 = v->nb[0]; + const int nbv1 = v->nb[1]; + const int nbv2 = v->nb[2]; + const int nbv3 = v->nb[3]; + + const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + const int64_t D = neq0; + const int64_t N = neq1; + const int64_t P = nek1 - N; + const int64_t M = P + N; + + const int Mup = ggml_up(M, GGML_SOFT_MAX_UNROLL); + + GGML_ASSERT(ne0 == D); + GGML_ASSERT(ne1 == N); + GGML_ASSERT(P >= 0); + + GGML_ASSERT(nbq0 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nbk0 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nbv0 == sizeof(ggml_fp16_t)); + + GGML_ASSERT(neq0 == D); + GGML_ASSERT(nek0 == D); + GGML_ASSERT(nev1 == D); + + GGML_ASSERT(neq1 == N); + GGML_ASSERT(nek1 == N + P); + GGML_ASSERT(nev1 == D); + + // dst cannot be transposed or permuted + GGML_ASSERT(nb0 == sizeof(float)); + GGML_ASSERT(nb0 <= nb1); + GGML_ASSERT(nb1 <= nb2); + GGML_ASSERT(nb2 <= nb3); + + if (params->type == GGML_TASK_INIT) { + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // parallelize by q rows using ggml_vec_dot_f32 + + // total rows in q + const int nr = neq1*neq2*neq3; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + const float scale = 1.0f/sqrtf(D); + + //printf("P=%d N=%d D=%d ir0=%d ir1=%d scale = %f\n", P, N, D, ir0, ir1, scale); + + for (int ir = ir0; ir < ir1; ++ir) { + // q indices + const int iq3 = ir/(neq2*neq1); + const int iq2 = (ir - iq3*neq2*neq1)/neq1; + const int iq1 = (ir - iq3*neq2*neq1 - iq2*neq1); + + float * S = (float *) params->wdata + ith*(2*Mup + CACHE_LINE_SIZE_F32); + + for (int i = M; i < Mup; ++i) { + S[i] = -INFINITY; + } + + if (GGML_VEC_DOT_UNROLL > 2 || nek1 % GGML_VEC_DOT_UNROLL != 0) { + for (int64_t ic = 0; ic < nek1; ++ic) { + // k indices + const int ik3 = iq3; + const int ik2 = iq2; + const int ik1 = ic; + + // S indices + const int i1 = ik1; + + ggml_vec_dot_f16(neq0, + S + i1, + (ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), + (ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); + } + } else { + for (int64_t ic = 0; ic < nek1; ic += GGML_VEC_DOT_UNROLL) { + // k indices + const int ik3 = iq3; + const int ik2 = iq2; + const int ik1 = ic; + + // S indices + const int i1 = ik1; + + ggml_vec_dot_f16_unroll(neq0, nbk1, + S + i1, + ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), + (ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); + } + } + + // scale + ggml_vec_scale_f32(nek1, S, scale); + + if (masked) { + for (int64_t i = P; i < M; i++) { + if (i > P + iq1) { + S[i] = -INFINITY; + } + } + } + + // softmax + { + float max = -INFINITY; + ggml_vec_max_f32(M, &max, S); + + ggml_float sum = 0.0; + { +#ifdef GGML_SOFT_MAX_ACCELERATE + max = -max; + vDSP_vsadd(S, 1, &max, S, 1, Mup); + vvexpf(S, S, &Mup); + ggml_vec_sum_f32(Mup, &sum, S); +#else + uint16_t scvt[GGML_SOFT_MAX_UNROLL]; + ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 }; + + for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) { + float * SS = S + i; + + for (int j = 0; j < GGML_SOFT_MAX_UNROLL; ++j) { + if (SS[j] == -INFINITY) { + SS[j] = 0.0f; + } else { + ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max); + memcpy(&scvt[j], &s, sizeof(uint16_t)); + const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]); + sump[j] += (ggml_float)val; + SS[j] = val; + } + } + } + + for (int i = 0; i < GGML_SOFT_MAX_UNROLL; i++) { + sum += sump[i]; + } +#endif + } + + assert(sum > 0.0); + + sum = 1.0/sum; + ggml_vec_scale_f32(M, S, sum); + +#ifndef NDEBUG + for (int i = 0; i < M; ++i) { + assert(!isnan(S[i])); + assert(!isinf(S[i])); + } +#endif + } + + ggml_fp16_t * S16 = (ggml_fp16_t *) ((float *) params->wdata + ith*(2*Mup + CACHE_LINE_SIZE_F32) + Mup); + + for (int64_t i = 0; i < M; i++) { + S16[i] = GGML_FP32_TO_FP16(S[i]); + } + + if (GGML_VEC_DOT_UNROLL == 1 || (nev1 % GGML_VEC_DOT_UNROLL != 0)) { + for (int64_t ic = 0; ic < nev1; ++ic) { + // dst indices + const int i1 = iq1; + const int i2 = iq2; + const int i3 = iq3; + + ggml_vec_dot_f16(nek1, + (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), + (ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + i2*nbv2 + i3*nbv3)), + S16); + } + } else { + for (int64_t ic = 0; ic < nev1; ic += GGML_VEC_DOT_UNROLL) { + // dst indices + const int i1 = iq1; + const int i2 = iq2; + const int i3 = iq3; + + ggml_vec_dot_f16_unroll(nek1, nbv1, + (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), + ((char *) v->data + ( ic*nbv1 + i2*nbv2 + i3*nbv3)), + S16); + } + } + } +} + +static void ggml_compute_forward_flash_attn( + const struct ggml_compute_params * params, + const struct ggml_tensor * q, + const struct ggml_tensor * k, + const struct ggml_tensor * v, + const bool masked, + struct ggml_tensor * dst) { + switch (q->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_flash_attn_f16(params, q, k, v, masked, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_flash_attn_f32(params, q, k, v, masked, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_flash_ff + +static void ggml_compute_forward_flash_ff_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * a, // F16 + const struct ggml_tensor * b0, // F16 fc_w + const struct ggml_tensor * b1, // F32 fc_b + const struct ggml_tensor * c0, // F16 proj_w + const struct ggml_tensor * c1, // F32 proj_b + struct ggml_tensor * dst) { + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t nea0 = a->ne[0]; + const int64_t nea1 = a->ne[1]; + const int64_t nea2 = a->ne[2]; + const int64_t nea3 = a->ne[3]; + + const int64_t neb00 = b0->ne[0]; + const int64_t neb01 = b0->ne[1]; + //const int64_t neb02 = b0->ne[2]; + //const int64_t neb03 = b0->ne[3]; + + const int64_t neb10 = b1->ne[0]; + const int64_t neb11 = b1->ne[1]; + //const int64_t neb12 = b1->ne[2]; + //const int64_t neb13 = b1->ne[3]; + + const int64_t nec00 = c0->ne[0]; + const int64_t nec01 = c0->ne[1]; + //const int64_t nec02 = c0->ne[2]; + //const int64_t nec03 = c0->ne[3]; + + const int64_t nec10 = c1->ne[0]; + const int64_t nec11 = c1->ne[1]; + //const int64_t nec12 = c1->ne[2]; + //const int64_t nec13 = c1->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + //const int64_t ne3 = dst->ne[3]; + + const int nba0 = a->nb[0]; + const int nba1 = a->nb[1]; + const int nba2 = a->nb[2]; + const int nba3 = a->nb[3]; + + const int nbb00 = b0->nb[0]; + const int nbb01 = b0->nb[1]; + const int nbb02 = b0->nb[2]; + const int nbb03 = b0->nb[3]; + + const int nbb10 = b1->nb[0]; + //const int nbb11 = b1->nb[1]; + //const int nbb12 = b1->nb[2]; + //const int nbb13 = b1->nb[3]; + + const int nbc00 = c0->nb[0]; + const int nbc01 = c0->nb[1]; + const int nbc02 = c0->nb[2]; + const int nbc03 = c0->nb[3]; + + const int nbc10 = c1->nb[0]; + //const int nbc11 = c1->nb[1]; + //const int nbc12 = c1->nb[2]; + //const int nbc13 = c1->nb[3]; + + const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + const int64_t D = nea0; + //const int64_t N = nea1; + const int64_t M = neb01; + + GGML_ASSERT(ne0 == nea0); + GGML_ASSERT(ne1 == nea1); + GGML_ASSERT(ne2 == nea2); + + GGML_ASSERT(nba0 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nbb00 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nbb10 == sizeof(float)); + GGML_ASSERT(nbc00 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nbc10 == sizeof(float)); + + GGML_ASSERT(neb00 == D); + GGML_ASSERT(neb01 == M); + GGML_ASSERT(neb10 == M); + GGML_ASSERT(neb11 == 1); + + GGML_ASSERT(nec00 == M); + GGML_ASSERT(nec01 == D); + GGML_ASSERT(nec10 == D); + GGML_ASSERT(nec11 == 1); + + // dst cannot be transposed or permuted + GGML_ASSERT(nb0 == sizeof(float)); + GGML_ASSERT(nb0 <= nb1); + GGML_ASSERT(nb1 <= nb2); + GGML_ASSERT(nb2 <= nb3); + + if (params->type == GGML_TASK_INIT) { + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // parallelize by a rows using ggml_vec_dot_f32 + + // total rows in a + const int nr = nea1*nea2*nea3; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int ir = ir0; ir < ir1; ++ir) { + // a indices + const int ia3 = ir/(nea2*nea1); + const int ia2 = (ir - ia3*nea2*nea1)/nea1; + const int ia1 = (ir - ia3*nea2*nea1 - ia2*nea1); + + float * S = (float *) params->wdata + ith*(2*M + CACHE_LINE_SIZE_F32); + + for (int64_t ic = 0; ic < neb01; ++ic) { + // b0 indices + const int ib03 = ia3; + const int ib02 = ia2; + const int ib01 = ic; + + // S indices + const int i1 = ib01; + + ggml_vec_dot_f16(nea0, + S + i1, + (ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)), + (ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3))); + } + + ggml_vec_add_f32(neb01, S, S, (float *) b1->data); + //ggml_vec_gelu_f32(neb01, S, S); + + ggml_fp16_t * S16 = (ggml_fp16_t *) ((float *) params->wdata + ith*(2*M + CACHE_LINE_SIZE_F32) + M); + + for (int64_t i = 0; i < M; i++) { + S16[i] = GGML_FP32_TO_FP16(S[i]); + } + + ggml_vec_gelu_f16(neb01, S16, S16); + + { + // dst indices + const int i1 = ia1; + const int i2 = ia2; + const int i3 = ia3; + + for (int64_t ic = 0; ic < nec01; ++ic) { + + ggml_vec_dot_f16(neb01, + (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), + (ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)), + S16); + } + + ggml_vec_add_f32(nec01, + (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3)), + (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3)), + (float *) c1->data); + } + } +} + +static void ggml_compute_forward_flash_ff( + const struct ggml_compute_params * params, + const struct ggml_tensor * a, + const struct ggml_tensor * b0, + const struct ggml_tensor * b1, + const struct ggml_tensor * c0, + const struct ggml_tensor * c1, + struct ggml_tensor * dst) { + switch (b0->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_flash_ff_f16(params, a, b0, b1, c0, c1, dst); + } break; + case GGML_TYPE_F32: + { + GGML_ASSERT(false); // TODO + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_flash_attn_back + +static void ggml_compute_forward_flash_attn_back_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * q, + const struct ggml_tensor * k, + const struct ggml_tensor * v, + const struct ggml_tensor * d, + const bool masked, + struct ggml_tensor * dst) { + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + const int64_t neq0 = q->ne[0]; + const int64_t neq1 = q->ne[1]; + const int64_t neq2 = q->ne[2]; + const int64_t neq3 = q->ne[3]; + + const int64_t nek0 = k->ne[0]; + const int64_t nek1 = k->ne[1]; + //const int64_t nek2 = k->ne[2]; + //const int64_t nek3 = k->ne[3]; + + const int64_t nev0 = v->ne[0]; + const int64_t nev1 = v->ne[1]; + //const int64_t nev2 = v->ne[2]; + //const int64_t nev3 = v->ne[3]; + + const int64_t ned0 = d->ne[0]; + const int64_t ned1 = d->ne[1]; + //const int64_t ned2 = d->ne[2]; + //const int64_t ned3 = d->ne[3]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const int nbk0 = k->nb[0]; + const int nbk1 = k->nb[1]; + const int nbk2 = k->nb[2]; + const int nbk3 = k->nb[3]; + + const int nbq0 = q->nb[0]; + const int nbq1 = q->nb[1]; + const int nbq2 = q->nb[2]; + const int nbq3 = q->nb[3]; + + const int nbv0 = v->nb[0]; + const int nbv1 = v->nb[1]; + const int nbv2 = v->nb[2]; + const int nbv3 = v->nb[3]; + + const int nbd0 = d->nb[0]; + const int nbd1 = d->nb[1]; + const int nbd2 = d->nb[2]; + const int nbd3 = d->nb[3]; + + const int nb0 = dst->nb[0]; + const int nb1 = dst->nb[1]; + const int nb2 = dst->nb[2]; + const int nb3 = dst->nb[3]; + + const int ith = params->ith; + const int nth = params->nth; + + const int64_t D = neq0; + const int64_t N = neq1; + const int64_t P = nek1 - N; + const int64_t M = P + N; + + const int Mup = ggml_up(M, GGML_SOFT_MAX_UNROLL); + const int mxDM = MAX(D, Mup); + + // GGML_ASSERT(ne0 == D); + // GGML_ASSERT(ne1 == N); + GGML_ASSERT(P >= 0); + + GGML_ASSERT(nbq0 == sizeof(float)); + GGML_ASSERT(nbk0 == sizeof(float)); + GGML_ASSERT(nbv0 == sizeof(float)); + + GGML_ASSERT(neq0 == D); + GGML_ASSERT(nek0 == D); + GGML_ASSERT(nev1 == D); + GGML_ASSERT(ned0 == D); + + GGML_ASSERT(neq1 == N); + GGML_ASSERT(nek1 == N + P); + GGML_ASSERT(nev1 == D); + GGML_ASSERT(ned1 == N); + + // dst cannot be transposed or permuted + GGML_ASSERT(nb0 == sizeof(float)); + GGML_ASSERT(nb0 <= nb1); + GGML_ASSERT(nb1 <= nb2); + GGML_ASSERT(nb2 <= nb3); + + if (params->type == GGML_TASK_INIT) { + if (ith == 0) { + memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3); + } + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // parallelize by q rows using ggml_vec_dot_f32 + + // total rows in q + const int nr = neq2*neq3; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + const float scale = 1.0f/sqrtf(D); + + //printf("P=%d N=%d D=%d ir0=%d ir1=%d scale = %f\n", P, N, D, ir0, ir1, scale); + + for (int ir = ir0; ir < ir1; ++ir) { + // q indices + const int iq3 = ir/(neq2); + const int iq2 = ir - iq3*neq2; + for ( int iq1 = 0; iq1 < neq1; ++iq1) { + + + // not sure about CACHE_LINE_SIZE_F32.. + // - maybe it must not be multiplied by 2 and excluded from .. in SM 1*(..) offset? + float * S = (float *) params->wdata + ith*2*(mxDM + CACHE_LINE_SIZE_F32) + 0*(mxDM+CACHE_LINE_SIZE_F32); + float * SM = (float *) params->wdata + ith*2*(mxDM + CACHE_LINE_SIZE_F32) + 1*(mxDM+CACHE_LINE_SIZE_F32); + + for (int i = M; i < Mup; ++i) { + S[i] = -INFINITY; + } + + for (int64_t ic = 0; ic < nek1; ++ic) { + // k indices + const int ik3 = iq3; + const int ik2 = iq2; + const int ik1 = ic; + + // S indices + const int i1 = ik1; + + ggml_vec_dot_f32(neq0, + S + i1, + (float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), + (float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); + } + + // scale + ggml_vec_scale_f32(nek1, S, scale); + + if (masked) { + for (int64_t i = P; i < M; i++) { + if (i > P + iq1) { + S[i] = -INFINITY; + } + } + } + + // softmax + { + float max = -INFINITY; + ggml_vec_max_f32(M, &max, S); + + ggml_float sum = 0.0; + { +#ifdef GGML_SOFT_MAX_ACCELERATE + max = -max; + vDSP_vsadd(SM, 1, &max, SM, 1, Mup); + vvexpf(SM, SM, &Mup); + ggml_vec_sum_f32(Mup, &sum, SM); +#else + uint16_t scvt[GGML_SOFT_MAX_UNROLL]; + ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 }; + + for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) { + float * SR = S + i; + float * SW = SM + i; + + for (int j = 0; j < GGML_SOFT_MAX_UNROLL; ++j) { + if (SR[j] == -INFINITY) { + SW[j] = 0.0f; + } else { + ggml_fp16_t s = GGML_FP32_TO_FP16(SR[j] - max); + memcpy(&scvt[j], &s, sizeof(uint16_t)); + const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]); + sump[j] += (ggml_float)val; + SW[j] = val; + } + } + } + + for (int i = 0; i < GGML_SOFT_MAX_UNROLL; i++) { + sum += sump[i]; + } +#endif + } + + assert(sum > 0.0); + + sum = 1.0/sum; + ggml_vec_scale_f32(M, SM, sum); + + } + + // step-by-step explanation + { + // forward-process shape grads from backward process + // parallel_for iq2,iq3: + // k[:D,:M,:,:] [D,M,:,:] grad[k][:D,:M,iq2,iq3] += grad[kcur] + // q[:D,:N,:,:] [D,N,:,:] grad[q][:D,iq1,iq2,iq3] += grad[qcur] + // v[:M,:D,:,:] [M,D,:,:] grad[v][:M,:D,iq2,iq3] += grad[vcur] + // for iq1: + // kcur = k[:D,:M,iq2,iq3] [D,M,1,1] grad[kcur] = grad[S1].T @ qcur + // qcur = q[:D,iq1,iq2,iq3] [D,1,1,1] grad[qcur] = grad[S1] @ kcur + // vcur = v[:M,:D,iq2,iq3] [M,D,1,1] grad[vcur] = grad[S5].T @ S4 + // S0 = -Inf [D,1,1,1] + // ~S1[i] = dot(kcur[:D,i], qcur) + // S1 = qcur @ kcur.T [M,1,1,1] grad[S1] = grad[S2] * scale + // S2 = S1 * scale [M,1,1,1] grad[S2] = diag_mask_zero(grad[S3], P) + // S3 = diag_mask_inf(S2, P) [M,1,1,1] grad[S3] = S4 * (grad[S4] - dot(S4, grad[S4])) + // S4 = softmax(S3) [M,1,1,1] grad[S4] = grad[S5] @ vcur + // ~S5[i] = dot(vcur[:,i], S4) + // S5 = S4 @ vcur.T [D,1,1,1] grad[S5] = d[:D,iq1,iq2,iq3] + // ~dst[i,iq1,iq2,iq3] = S5[i] ^ + // dst[:D,iq1,iq2,iq3] = S5 | grad[dst[:D,iq1,iq2,iq3]] = d[:D,iq1,iq2,iq3] + // dst backward-/ grad[dst] = d + // + // output gradients with their dependencies: + // + // grad[kcur] = grad[S1].T @ qcur + // grad[S1] = diag_mask_zero(grad[S3], P) * scale + // grad[S3] = S4 * (grad[S4] - dot(S4, grad[S4])) + // grad[S4] = grad[S5] @ vcur + // grad[S4] = d[:D,iq1,iq2,iq3] @ vcur + // grad[qcur] = grad[S1] @ kcur + // grad[vcur] = grad[S5].T @ S4 + // grad[vcur] = d[:D,iq1,iq2,iq3].T @ S4 + // + // in post-order: + // + // S1 = qcur @ kcur.T + // S2 = S1 * scale + // S3 = diag_mask_inf(S2, P) + // S4 = softmax(S3) + // grad[S4] = d[:D,iq1,iq2,iq3] @ vcur + // grad[S3] = S4 * (grad[S4] - dot(S4, grad[S4])) + // grad[S1] = diag_mask_zero(grad[S3], P) * scale + // grad[qcur] = grad[S1] @ kcur + // grad[kcur] = grad[S1].T @ qcur + // grad[vcur] = d[:D,iq1,iq2,iq3].T @ S4 + // + // using less variables (SM=S4): + // + // S = diag_mask_inf(qcur @ kcur.T * scale, P) + // SM = softmax(S) + // S = d[:D,iq1,iq2,iq3] @ vcur + // dot_SM_gradSM = dot(SM, S) + // S = SM * (S - dot(SM, S)) + // S = diag_mask_zero(S, P) * scale + // + // grad[q][:D,iq1,iq2,iq3] += S @ kcur + // grad[k][:D,:M,iq2,iq3] += S.T @ qcur + // grad[v][:M,:D,iq2,iq3] += d[:D,iq1,iq2,iq3].T @ SM + } + + // S = gradSM = d[:D,iq1,iq2,iq3] @ vcur + // S = d[:D,iq1,iq2,iq3] @ vcur + // S[:M] += vcur[:M,ic] * d[ic,iq1,iq2,iq3] + ggml_vec_set_f32(M, S, 0); + for (int64_t ic = 0; ic < D; ++ic) { + // dst indices + const int i1 = iq1; + const int i2 = iq2; + const int i3 = iq3; + + ggml_vec_mad_f32(M, + S, + (float *) ((char *) v->data + ( ic*nbv1 + i2*nbv2 + i3*nbv3)), + *(float *) ((char *) d->data + (ic*nbd0 + i1*nbd1 + i2*nbd2 + i3*nbd3))); + } + + // S = SM * (S - dot(SM, S)) + float dot_SM_gradSM = 0; + ggml_vec_dot_f32 (M, &dot_SM_gradSM, SM, S); + ggml_vec_acc1_f32(M, S, -dot_SM_gradSM); + ggml_vec_mul_f32 (M, S, S, SM); + + // S = diag_mask_zero(S, P) * scale + if (masked) { + // for (int64_t i = P + iq1 + 1; i < M; i++) { + // S[i] = 0; + // } + for (int64_t i = P; i < M; i++) { + if (i > P + iq1) { + S[i] = 0; + } + } + } + ggml_vec_scale_f32(M, S, scale); + + void * grad_q = (char *) dst->data; + void * grad_k = (char *) dst->data + nb0*D*N*neq2*neq3; + void * grad_v = (char *) dst->data + nb0*D*N*neq2*neq3 + nb0*D*M*neq2*neq3; + + const size_t nbgq1 = nb0*neq0; + const size_t nbgq2 = nb0*neq0*neq1; + const size_t nbgq3 = nb0*neq0*neq1*neq2; + + const size_t nbgk1 = nb0*nek0; + const size_t nbgk2 = nb0*nek0*nek1; + const size_t nbgk3 = nb0*nek0*nek1*neq2; + + const size_t nbgv1 = nb0*nev0; + const size_t nbgv2 = nb0*nev0*nev1; + const size_t nbgv3 = nb0*nev0*nev1*neq2; + + // S shape [M,1] + // SM shape [M,1] + // kcur shape [D,M] + // qcur shape [D,1] + // vcur shape [M,D] + // + // grad[q][:D,iq1,iq2,iq3] += S @ kcur + // grad[q][:D,iq1,iq2,iq3] += shape[M,1] @ shape[D,M] + // grad[q][:D,iq1,iq2,iq3] += S[ic] * kcur[:D,ic] + // + //// grad[q][ic,iq1,iq2,iq3] += dot(kcur[:,ic],S.T) + //// grad[q][ic,iq1,iq2,iq3] += dot(k[:D,ic,iq2,iq3],S.T) + for (int64_t ic = 0; ic < M; ++ic) { + // dst indices + const int i1 = iq1; + const int i2 = iq2; + const int i3 = iq3; + + ggml_vec_mad_f32(D, + (float *) ((char *) grad_q + (i1*nbgq1 + i2*nbgq2 + i3*nbgq3)), + (float *) ((char *) k->data + (ic*nbk1 + i2*nbk2 + i3*nbk3)), + S[ic]); + } + + // grad[k][:D,:M,iq2,iq3] += S.T @ qcur + // grad[k][:D,ic,iq2,iq3] += S.T[0,ic] * qcur[:D,0] + // grad[k][:D,ic,iq2,iq3] += S[ic] * qcur[:D,0] + for (int64_t ic = 0; ic < M; ++ic) { + // dst indices + const int i1 = iq1; + const int i2 = iq2; + const int i3 = iq3; + + // ggml_vec_set_f32(D, + // (float *) ((char *) grad_k + (ic*nbgk1 + i2*nbgk2 + i3*nbgk3)), + // 0); + ggml_vec_mad_f32(D, + (float *) ((char *) grad_k + (ic*nbgk1 + i2*nbgk2 + i3*nbgk3)), + (float *) ((char *) q->data + (i1*nbq1 + i2*nbq2 + i3*nbq3)), + S[ic]); + } + + // grad[v][:M,:D,iq2,iq3] += d[:D,iq1,iq2,iq3].T @ SM + // grad[v][:M,ic,iq2,iq3] += d[:D,iq1,iq2,iq3].T[0,ic] * SM[:M] + // grad[v][:M,ic,iq2,iq3] += d[ic,iq1,iq2,iq3] * SM[:M] + for (int64_t ic = 0; ic < D; ++ic) { + // dst indices + const int i1 = iq1; + const int i2 = iq2; + const int i3 = iq3; + + // ggml_vec_set_f32(M, + // (float *) ((char *) grad_v + ( ic*nbgv1 + i2*nbgv2 + i3*nbgv3)), + // 0); + ggml_vec_mad_f32(M, + (float *) ((char *) grad_v + ( ic*nbgv1 + i2*nbgv2 + i3*nbgv3)), + SM, + *(float *) ((char *) d->data + (ic*nbd0 + i1*nbd1 + i2*nbd2 + i3*nbd3))); + } + } + } +} + +static void ggml_compute_forward_flash_attn_back( + const struct ggml_compute_params * params, + const struct ggml_tensor * q, + const struct ggml_tensor * k, + const struct ggml_tensor * v, + const struct ggml_tensor * d, + const bool masked, + struct ggml_tensor * dst) { + switch (q->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_flash_attn_back_f32(params, q, k, v, d, masked, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_map_unary + +static void ggml_compute_forward_map_unary_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst, + const ggml_unary_op_f32_t fun) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + assert( dst->nb[0] == sizeof(float)); + assert(src0->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + fun(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1]))); + } +} + + +static void ggml_compute_forward_map_unary( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst, + const ggml_unary_op_f32_t fun) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_map_unary_f32(params, src0, dst, fun); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_map_binary + +static void ggml_compute_forward_map_binary_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst, + const ggml_binary_op_f32_t fun) { + assert(params->ith == 0); + assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n = ggml_nrows(src0); + const int nc = src0->ne[0]; + + assert( dst->nb[0] == sizeof(float)); + assert(src0->nb[0] == sizeof(float)); + assert(src1->nb[0] == sizeof(float)); + + for (int i = 0; i < n; i++) { + fun(nc, + (float *) ((char *) dst->data + i*( dst->nb[1])), + (float *) ((char *) src0->data + i*(src0->nb[1])), + (float *) ((char *) src1->data + i*(src1->nb[1]))); + } +} + + +static void ggml_compute_forward_map_binary( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst, + const ggml_binary_op_f32_t fun) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_map_binary_f32(params, src0, src1, dst, fun); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_cross_entropy_loss + +static void ggml_compute_forward_cross_entropy_loss_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(src1)); + GGML_ASSERT(ggml_is_scalar(dst)); + GGML_ASSERT(ggml_are_same_shape(src0, src1)); + + const int ith = params->ith; + const int nth = params->nth; + + float * sums = (float *) params->wdata; + + // TODO: handle transposed/permuted matrices + const int nc = src0->ne[0]; + const int nr = ggml_nrows(src0); + + if (params->type == GGML_TASK_INIT) { + if (ith == 0) { + memset(sums, 0, sizeof(float) * (nth + nth * nc)); + } + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + if (ith == 0) { + float * dp = (float *) dst->data; + ggml_vec_sum_f32(nth, dp, sums); + dp[0] *= -1.0f; + } + return; + } + + const double eps = 1e-9; + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * s0 = (float *)((char *) src0->data + i1*src0->nb[1]); + float * s1 = (float *)((char *) src1->data + i1*src1->nb[1]); + float * st = (float *) params->wdata + nth + ith*nc; + +#ifndef NDEBUG + for (int i = 0; i < nc; ++i) { + //printf("p[%d] = %f\n", i, p[i]); + assert(!isnan(s0[i])); + assert(!isnan(s1[i])); + } +#endif + // soft_max + ggml_float sum = 0.0; + { + float max = -INFINITY; + ggml_vec_max_f32(nc, &max, s0); + + uint16_t scvt; + for (int i = 0; i < nc; i++) { + if (s0[i] == -INFINITY) { + st[i] = 0.0f; + } else { + // const float val = (s0[i] == -INFINITY) ? 0.0 : exp(s0[i] - max); + ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max); + memcpy(&scvt, &s, sizeof(scvt)); + const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]); + sum += (ggml_float)val; + st[i] = val; + } + } + + assert(sum > 0.0); + // sum = 1.0/sum; + } + // avoid log(0) by rescaling from [0..1] to [eps..1] + sum = (1.0 - eps) / sum; + ggml_vec_scale_f32(nc, st, sum); + ggml_vec_add1_f32(nc, st, st, eps); + ggml_vec_log_f32(nc, st, st); + ggml_vec_mul_f32(nc, st, st, s1); + + ggml_vec_sum_f32(nc, sums + ith, st); + +#ifndef NDEBUG + for (int i = 0; i < nc; ++i) { + assert(!isnan(st[i])); + assert(!isinf(st[i])); + } +#endif + } + +} + +static void ggml_compute_forward_cross_entropy_loss( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_cross_entropy_loss_f32(params, src0, src1, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + +// ggml_compute_forward_cross_entropy_loss_back + +static void ggml_compute_forward_cross_entropy_loss_back_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_is_contiguous(dst)); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(src1)); + GGML_ASSERT(ggml_is_contiguous(opt0)); + GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); + + const int64_t ith = params->ith; + const int64_t nth = params->nth; + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const float eps = 1e-9f; + + // TODO: handle transposed/permuted matrices + const int64_t nc = src0->ne[0]; + const int64_t nr = ggml_nrows(src0); + + // rows per thread + const int64_t dr = (nr + nth - 1)/nth; + + // row range for this thread + const int64_t ir0 = dr*ith; + const int64_t ir1 = MIN(ir0 + dr, nr); + + float * d = (float *) opt0->data; + + for (int64_t i1 = ir0; i1 < ir1; i1++) { + float * ds0 = (float *)((char *) dst->data + i1*dst->nb[1]); + float * s0 = (float *)((char *) src0->data + i1*src0->nb[1]); + float * s1 = (float *)((char *) src1->data + i1*src1->nb[1]); + float * sm = (float *) params->wdata + ith*nc; + +#ifndef NDEBUG + for (int i = 0; i < nc; ++i) { + //printf("p[%d] = %f\n", i, p[i]); + assert(!isnan(s0[i])); + assert(!isnan(s1[i])); + } +#endif + // step by step explanation: + { + //float * sums = (float *) params->wdata; + + // forward pass with annotated gradients from backward pass + // (built by going in reverse operation order, adding to gradients of current operation args) + // st0 = exp(s0-max(s0)) grad[st0] = grad[st1]*(1.0 - eps)/sum + // from softmax_back: grad[s0] = st1_k * (grad[st1]_k - dot(st1, grad[st1])) + // ggml_vec_scale_f32(nc, st, sum); // st1 = st0*/sum = softmax(s0) grad[st1] = grad[st2]*(1.0 - eps) + // ggml_vec_scale_f32(nc, st, (1.0f - eps)); // st2 = st1*(1.0 - eps) grad[st2] = grad[st3] + // ggml_vec_add1_f32(nc, st, st, eps); // st3 = st2 + eps grad[st3] = grad[st4]/st3 + // ggml_vec_log_f32(nc, st, st); // st4 = log(st3) grad[st4] = grad[st5] * s1 + // ggml_vec_mul_f32(nc, st, st, s1); // st5 = st4 * s1 grad[st5] = grad[sums[ith]] + // ggml_vec_sum_f32(nc, sums + ith, st); // sums[ith] = st5 grad[sums[ith]] = grad[cross_entropy_loss] = -grad[cel] + + // substitute into grad[st1], because we can reuse softmax_back from this point on + // grad[st1] = -grad[cel]*s1*(1.0 - eps)/(eps + softmax(s0)*(1.0 - eps)) + // postorder: + // grad[st1] := softmax(s0) + // grad[st1] := grad[st1]*(1.0 - eps) + // grad[st1] := grad[st1] + eps + // grad[st1] := s1 / grad[st1] + // grad[st1] := grad[st1]*(1.0-eps)*-grad[cel] + + // src0 gradients by going through softmax_back + // grad[s0] = st1_k * (grad[st1]_k - dot(st1, grad[st1])) + // from softmax_back: + // dxk = yk * (dyk - dot(y, dy)) + // dot_y_dy := dot(y, dy) + // dx := dy + // dx := dx - dot_y_dy + // dx := dx * y + // postorder: + // dot_st1_dst1 := dot(st1, grad[st1]) + // grad[s0] := grad[st1] + // grad[s0] := grad[s0] - dot_st1_dst1 + // grad[s0] := grad[s0] * st1 + + // prepend postorder from grad[st1] directly using grad[s0] as memory location, as we will grad[s0] := grad[st1] + // sm := softmax(s0) + // grad[s0] := sm*(1.0 - eps) + // grad[s0] := grad[s0] + eps + // grad[s0] := s1 / grad[s0] + // grad[s0] := grad[s0]*(1.0-eps)*-grad[cel] + // dot_st1_dst1 := dot(sm, grad[s0]) + // grad[s0] := grad[s0] - dot_st1_dst1 + // grad[s0] := grad[s0] * sm + } + + // soft_max + ggml_float sum = 0.0; + { + float max = -INFINITY; + ggml_vec_max_f32(nc, &max, s0); + + uint16_t scvt; + for (int i = 0; i < nc; i++) { + if (s0[i] == -INFINITY) { + sm[i] = 0.0f; + } else { + // const float val = (s0[i] == -INFINITY) ? 0.0 : exp(s0[i] - max); + ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max); + memcpy(&scvt, &s, sizeof(scvt)); + const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]); + sum += (ggml_float)val; + sm[i] = val; + } + } + + assert(sum > 0.0); + sum = 1.0/sum; + } + + float dot_st1_dst1 = 0; + ggml_vec_scale_f32(nc, sm, sum); + ggml_vec_cpy_f32 (nc, ds0, sm); + ggml_vec_scale_f32(nc, ds0, (1.0f - eps)); + ggml_vec_add1_f32 (nc, ds0, ds0, eps); + ggml_vec_div_f32 (nc, ds0, s1, ds0); + ggml_vec_scale_f32(nc, ds0, -(1.0f - eps)*d[0]); + ggml_vec_dot_f32 (nc, &dot_st1_dst1, sm, ds0); + ggml_vec_acc1_f32 (nc, ds0, -dot_st1_dst1); + ggml_vec_mul_f32 (nc, ds0, ds0, sm); + +#ifndef NDEBUG + for (int i = 0; i < nc; ++i) { + assert(!isnan(sm[i])); + assert(!isinf(sm[i])); + assert(!isnan(ds0[i])); + assert(!isinf(ds0[i])); + } +#endif + } +} + +static void ggml_compute_forward_cross_entropy_loss_back( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + const struct ggml_tensor * opt0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_cross_entropy_loss_back_f32(params, src0, src1, opt0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + + +///////////////////////////////// + +static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) { + GGML_ASSERT(params); + +#ifdef GGML_USE_CUBLAS + bool skip_cpu = ggml_cuda_compute_forward(params, tensor); + if (skip_cpu) { + return; + } + GGML_ASSERT(tensor->src0->backend == GGML_BACKEND_CPU); + GGML_ASSERT(tensor->src1 == NULL || tensor->src1->backend == GGML_BACKEND_CPU); +#endif // GGML_USE_CUBLAS + + switch (tensor->op) { + case GGML_OP_DUP: + { + ggml_compute_forward_dup(params, tensor->src0, tensor); + } break; + case GGML_OP_ADD: + { + ggml_compute_forward_add(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_ADD1: + { + ggml_compute_forward_add1(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_ACC: + { + ggml_compute_forward_acc(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + } break; + case GGML_OP_SUB: + { + ggml_compute_forward_sub(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_MUL: + { + ggml_compute_forward_mul(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_DIV: + { + ggml_compute_forward_div(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_SQR: + { + ggml_compute_forward_sqr(params, tensor->src0, tensor); + } break; + case GGML_OP_SQRT: + { + ggml_compute_forward_sqrt(params, tensor->src0, tensor); + } break; + case GGML_OP_LOG: + { + ggml_compute_forward_log(params, tensor->src0, tensor); + } break; + case GGML_OP_SUM: + { + ggml_compute_forward_sum(params, tensor->src0, tensor); + } break; + case GGML_OP_SUM_ROWS: + { + ggml_compute_forward_sum_rows(params, tensor->src0, tensor); + } break; + case GGML_OP_MEAN: + { + ggml_compute_forward_mean(params, tensor->src0, tensor); + } break; + case GGML_OP_REPEAT: + { + ggml_compute_forward_repeat(params, tensor->src0, tensor); + } break; + case GGML_OP_REPEAT_BACK: + { + ggml_compute_forward_repeat_back(params, tensor->src0, tensor); + } break; + case GGML_OP_REPEAT2: + { + ggml_compute_forward_repeat2(params, tensor->src0, tensor); + } break; + case GGML_OP_ABS: + { + ggml_compute_forward_abs(params, tensor->src0, tensor); + } break; + case GGML_OP_SGN: + { + ggml_compute_forward_sgn(params, tensor->src0, tensor); + } break; + case GGML_OP_NEG: + { + ggml_compute_forward_neg(params, tensor->src0, tensor); + } break; + case GGML_OP_STEP: + { + ggml_compute_forward_step(params, tensor->src0, tensor); + } break; + case GGML_OP_RELU: + { + ggml_compute_forward_relu(params, tensor->src0, tensor); + } break; + case GGML_OP_GELU: + { + ggml_compute_forward_gelu(params, tensor->src0, tensor); + } break; + case GGML_OP_SILU: + { + ggml_compute_forward_silu(params, tensor->src0, tensor); + } break; + case GGML_OP_SILU_BACK: + { + ggml_compute_forward_silu_back(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_NORM: + { + ggml_compute_forward_norm(params, tensor->src0, tensor); + } break; + case GGML_OP_RMS_NORM: + { + ggml_compute_forward_rms_norm(params, tensor->src0, tensor); + } break; + case GGML_OP_RMS_NORM_BACK: + { + ggml_compute_forward_rms_norm_back(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_MUL_MAT: + { + ggml_compute_forward_mul_mat(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_OUT_PROD: + { + ggml_compute_forward_out_prod(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_SCALE: + { + ggml_compute_forward_scale(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_SET: + { + ggml_compute_forward_set(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + } break; + case GGML_OP_CPY: + { + ggml_compute_forward_cpy(params, tensor->src0, tensor); + } break; + case GGML_OP_CONT: + { + ggml_compute_forward_cont(params, tensor->src0, tensor); + } break; + case GGML_OP_RESHAPE: + { + ggml_compute_forward_reshape(params, tensor->src0, tensor); + } break; + case GGML_OP_VIEW: + { + ggml_compute_forward_view(params, tensor->src0); + } break; + case GGML_OP_PERMUTE: + { + ggml_compute_forward_permute(params, tensor->src0); + } break; + case GGML_OP_TRANSPOSE: + { + ggml_compute_forward_transpose(params, tensor->src0); + } break; + case GGML_OP_GET_ROWS: + { + ggml_compute_forward_get_rows(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_GET_ROWS_BACK: + { + ggml_compute_forward_get_rows_back(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + } break; + case GGML_OP_DIAG: + { + ggml_compute_forward_diag(params, tensor->src0, tensor); + } break; + case GGML_OP_DIAG_MASK_INF: + { + ggml_compute_forward_diag_mask_inf(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_DIAG_MASK_ZERO: + { + ggml_compute_forward_diag_mask_zero(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_SOFT_MAX: + { + ggml_compute_forward_soft_max(params, tensor->src0, tensor); + } break; + case GGML_OP_SOFT_MAX_BACK: + { + ggml_compute_forward_soft_max_back(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_ROPE: + { + ggml_compute_forward_rope(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_ROPE_BACK: + { + ggml_compute_forward_rope_back(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_ALIBI: + { + ggml_compute_forward_alibi(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_CLAMP: + { + ggml_compute_forward_clamp(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_CONV_1D_1S: + { + ggml_compute_forward_conv_1d_1s(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_CONV_1D_2S: + { + ggml_compute_forward_conv_1d_2s(params, tensor->src0, tensor->src1, tensor); + } break; + case GGML_OP_FLASH_ATTN: + { + int32_t t = ggml_get_i32_1d(tensor->opt[1], 0); + GGML_ASSERT(t == 0 || t == 1); + bool masked = t != 0; + ggml_compute_forward_flash_attn(params, tensor->src0, tensor->src1, tensor->opt[0], masked, tensor); + } break; + case GGML_OP_FLASH_FF: + { + ggml_compute_forward_flash_ff(params, tensor->src0, tensor->src1, tensor->opt[0], tensor->opt[1], tensor->opt[2], tensor); + } break; + case GGML_OP_FLASH_ATTN_BACK: + { + int32_t t = ggml_get_i32_1d(tensor->opt[2], 0); + GGML_ASSERT(t == 0 || t == 1); + bool masked = t != 0; + ggml_compute_forward_flash_attn_back(params, tensor->src0, tensor->src1, tensor->opt[0], tensor->opt[1], masked, tensor); + } break; + case GGML_OP_MAP_UNARY: + { + const ggml_unary_op_f32_t fun = *((ggml_unary_op_f32_t *)tensor->opt[0]->data); + ggml_compute_forward_map_unary(params, tensor->src0, tensor, fun); + } + break; + case GGML_OP_MAP_BINARY: + { + const ggml_binary_op_f32_t fun = *((ggml_binary_op_f32_t *)tensor->opt[0]->data); + ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun); + } + break; + case GGML_OP_CROSS_ENTROPY_LOSS: + { + ggml_compute_forward_cross_entropy_loss(params, tensor->src0, tensor->src1, tensor); + } + break; + case GGML_OP_CROSS_ENTROPY_LOSS_BACK: + { + ggml_compute_forward_cross_entropy_loss_back(params, tensor->src0, tensor->src1, tensor->opt[0], tensor); + } + break; + case GGML_OP_NONE: + { + // nop + } break; + case GGML_OP_COUNT: + { + GGML_ASSERT(false); + } break; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, bool inplace) { + struct ggml_tensor * src0 = tensor->src0; + struct ggml_tensor * src1 = tensor->src1; + + switch (tensor->op) { + case GGML_OP_DUP: + { + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); + } + } break; + case GGML_OP_ADD: + { + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); + } + if (src1->grad) { + src1->grad = ggml_add_impl(ctx, src1->grad, tensor->grad, inplace); + } + } break; + case GGML_OP_ADD1: + { + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); + } + if (src1->grad) { + src1->grad = ggml_add_impl(ctx, + src1->grad, + ggml_mean(ctx, tensor->grad), // TODO: should probably be sum instead of mean + inplace); + } + } break; + case GGML_OP_ACC: + { + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); + } + if (src1->grad) { + GGML_ASSERT(ggml_nelements(tensor->opt[0]) == 5); + GGML_ASSERT(tensor->opt[0]->type == GGML_TYPE_I32); + const size_t nb1 = (( int32_t * ) tensor->opt[0]->data)[0]; + const size_t nb2 = (( int32_t * ) tensor->opt[0]->data)[1]; + const size_t nb3 = (( int32_t * ) tensor->opt[0]->data)[2]; + const size_t offset = (( int32_t * ) tensor->opt[0]->data)[3]; + + struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx, + tensor->grad, + src1->grad->ne[0], + src1->grad->ne[1], + src1->grad->ne[2], + src1->grad->ne[3], + nb1, nb2, nb3, offset); + + src1->grad = + ggml_add_impl(ctx, + src1->grad, + ggml_reshape(ctx, + ggml_cont(ctx, tensor_grad_view), + src1->grad), + inplace); + } + } break; + case GGML_OP_SUB: + { + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); + } + if (src1->grad) { + src1->grad = ggml_sub_impl(ctx, src1->grad, tensor->grad, inplace); + } + } break; + case GGML_OP_MUL: + { + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_mul(ctx, src1, tensor->grad), + inplace); + } + if (src1->grad) { + src1->grad = + ggml_add_impl(ctx, + src1->grad, + ggml_mul(ctx, src0, tensor->grad), + inplace); + } + } break; + case GGML_OP_DIV: + { + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_div(ctx, tensor->grad, src1), + inplace); + } + if (src1->grad) { + src1->grad = + ggml_sub_impl(ctx, + src1->grad, + ggml_mul(ctx, + tensor->grad, + ggml_div(ctx, tensor, src1)), + inplace); + } + } break; + case GGML_OP_SQR: + { + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_scale(ctx, + ggml_mul(ctx, src0, tensor->grad), + ggml_new_f32(ctx, 2.0f)), + inplace); + } + } break; + case GGML_OP_SQRT: + { + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_scale(ctx, + ggml_div(ctx, + tensor->grad, + tensor), + ggml_new_f32(ctx, 0.5f)), + inplace); + } + } break; + case GGML_OP_LOG: + { + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_div(ctx, + tensor->grad, + src0), + inplace); + } + } break; + case GGML_OP_SUM: + { + if (src0->grad) { + src0->grad = + ggml_add1_impl(ctx, + src0->grad, + tensor->grad, + inplace); + } + } break; + case GGML_OP_SUM_ROWS: + { + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_repeat(ctx, + tensor->grad, + src0->grad), + inplace); + } + } break; + case GGML_OP_MEAN: + { + GGML_ASSERT(false); // TODO: implement + } break; + case GGML_OP_REPEAT: + { + // necessary for llama + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_repeat_back(ctx, tensor->grad, src0->grad), + inplace); + } + } break; + case GGML_OP_REPEAT_BACK: + { + if (src0->grad) { + // TODO: test this + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_repeat(ctx, tensor->grad, src0->grad), + inplace); + } + } break; + case GGML_OP_REPEAT2: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_ABS: + { + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_mul(ctx, + ggml_sgn(ctx, src0), + tensor->grad), + inplace); + } + } break; + case GGML_OP_SGN: + { + if (src0->grad) { + // noop + } + } break; + case GGML_OP_NEG: + { + if (src0->grad) { + src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace); + } + } break; + case GGML_OP_STEP: + { + if (src0->grad) { + // noop + } + } break; + case GGML_OP_RELU: + { + if (src0->grad) { + src0->grad = ggml_sub_impl(ctx, + src0->grad, + ggml_mul(ctx, + ggml_step(ctx, src0), + tensor->grad), + inplace); + } + } break; + case GGML_OP_GELU: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_ALIBI: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_CLAMP: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_SILU: + { + // necessary for llama + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_silu_back(ctx, src0, tensor->grad), + inplace); + } + } break; + case GGML_OP_SILU_BACK: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_NORM: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_RMS_NORM: + { + // necessary for llama + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_rms_norm_back(ctx, src0, tensor->grad), + inplace); + } + } break; + case GGML_OP_RMS_NORM_BACK: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_MUL_MAT: + { + // https://cs231n.github.io/optimization-2/#staged + // # forward pass + // s0 = np.random.randn(5, 10) + // s1 = np.random.randn(10, 3) + // t = s0.dot(s1) + + // # now suppose we had the gradient on t from above in the circuit + // dt = np.random.randn(*t.shape) # same shape as t + // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix + // ds1 = t.T.dot(dt) + + // tensor.shape [m,p] + // src0.shape [n,m] + // src1.shape [n,p] + + // necessary for llama + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_out_prod(ctx, // [n,m] + src1, // [n,p] + tensor->grad), // [m,p] + inplace); + } + if (src1->grad) { + src1->grad = + ggml_add_impl(ctx, + src1->grad, + // ggml_mul_mat(ctx, // [n,p] + // ggml_cont(ctx, // [m,n] + // ggml_transpose(ctx, src0)), // [m,n] + // tensor->grad), // [m,p] + + // // when src0 is bigger than tensor->grad (this is mostly the case in llama), + // // avoid transpose of src0, rather transpose smaller tensor->grad + // // and then use ggml_out_prod + ggml_out_prod(ctx, // [n,p] + src0, // [n,m] + ggml_transpose(ctx, // [p,m] + tensor->grad)), // [m,p] + inplace); + } + } break; + case GGML_OP_OUT_PROD: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_SCALE: + { + // necessary for llama + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_scale_impl(ctx, tensor->grad, src1, false), + inplace); + } + if (src1->grad) { + src1->grad = + ggml_add_impl(ctx, + src1->grad, + ggml_sum(ctx, ggml_mul_impl(ctx, tensor->grad, src0, false)), + inplace); + } + } break; + case GGML_OP_SET: + { + GGML_ASSERT(ggml_nelements(tensor->opt[0]) == 5); + GGML_ASSERT(tensor->opt[0]->type == GGML_TYPE_I32); + const size_t nb1 = (( int32_t * ) tensor->opt[0]->data)[0]; + const size_t nb2 = (( int32_t * ) tensor->opt[0]->data)[1]; + const size_t nb3 = (( int32_t * ) tensor->opt[0]->data)[2]; + const size_t offset = (( int32_t * ) tensor->opt[0]->data)[3]; + + struct ggml_tensor * tensor_grad_view = NULL; + + if (src0->grad || src1->grad) { + GGML_ASSERT(src0->type == tensor->type); + GGML_ASSERT(tensor->grad->type == tensor->type); + GGML_ASSERT(tensor->grad->type == src1->grad->type); + + tensor_grad_view = ggml_view_4d(ctx, + tensor->grad, + src1->grad->ne[0], + src1->grad->ne[1], + src1->grad->ne[2], + src1->grad->ne[3], + nb1, nb2, nb3, offset); + } + + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_acc_impl(ctx, + tensor->grad, + ggml_neg(ctx, tensor_grad_view), + nb1, nb2, nb3, offset, false), + inplace); + } + + if (src1->grad) { + src1->grad = + ggml_add_impl(ctx, + src1->grad, + ggml_reshape(ctx, + ggml_cont(ctx, tensor_grad_view), + src1->grad), + inplace); + } + } break; + case GGML_OP_CPY: + { + // necessary for llama + // cpy overwrites value of src1 by src0 and returns view(src1) + // the overwriting is mathematically equivalent to: + // tensor = src0 * 1 + src1 * 0 + if (src0->grad) { + // dsrc0 = dtensor * 1 + src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); + } + if (src1->grad) { + // dsrc1 = dtensor * 0 -> noop + } + } break; + case GGML_OP_CONT: + { + // same as cpy + if (src0->grad) { + GGML_ASSERT(ggml_is_contiguous(src0->grad)); + GGML_ASSERT(ggml_is_contiguous(tensor->grad)); + src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); + } + } break; + case GGML_OP_RESHAPE: + { + // necessary for llama + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, src0->grad, + ggml_reshape(ctx, tensor->grad, src0->grad), + inplace); + } + } break; + case GGML_OP_VIEW: + { + // necessary for llama + if (src0->grad) { + size_t offset; + + GGML_ASSERT(sizeof(offset) <= ggml_nbytes(tensor->opt[0])); + memcpy(&offset, tensor->opt[0]->data, sizeof(offset)); + + size_t nb1 = tensor->nb[1]; + size_t nb2 = tensor->nb[2]; + size_t nb3 = tensor->nb[3]; + + if (src0->type != src0->grad->type) { + // gradient is typically F32, but src0 could be other type + size_t ng = ggml_element_size(src0->grad); + size_t n0 = ggml_element_size(src0); + GGML_ASSERT(offset % n0 == 0); + GGML_ASSERT(nb1 % n0 == 0); + GGML_ASSERT(nb2 % n0 == 0); + GGML_ASSERT(nb3 % n0 == 0); + offset = (offset / n0) * ng; + nb1 = (nb1 / n0) * ng; + nb2 = (nb2 / n0) * ng; + nb3 = (nb3 / n0) * ng; + } + + src0->grad = ggml_acc_impl(ctx, src0->grad, tensor->grad, nb1, nb2, nb3, offset, inplace); + } + } break; + case GGML_OP_PERMUTE: + { + // necessary for llama + if (src0->grad) { + int32_t * axes = (int32_t *) tensor->opt[0]->data; + int axis0 = axes[0] & 0x3; + int axis1 = axes[1] & 0x3; + int axis2 = axes[2] & 0x3; + int axis3 = axes[3] & 0x3; + int axes_backward[4] = {0,0,0,0}; + axes_backward[axis0] = 0; + axes_backward[axis1] = 1; + axes_backward[axis2] = 2; + axes_backward[axis3] = 3; + src0->grad = + ggml_add_impl(ctx, src0->grad, + ggml_permute(ctx, + tensor->grad, + axes_backward[0], + axes_backward[1], + axes_backward[2], + axes_backward[3]), + inplace); + } + } break; + case GGML_OP_TRANSPOSE: + { + // necessary for llama + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, src0->grad, + ggml_transpose(ctx, tensor->grad), + inplace); + } + } break; + case GGML_OP_GET_ROWS: + { + // necessary for llama (only for tokenizer) + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, src0->grad, + ggml_get_rows_back(ctx, tensor->grad, src1, src0->grad), + inplace); + } + if (src1->grad) { + // noop + } + } break; + case GGML_OP_GET_ROWS_BACK: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_DIAG: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_DIAG_MASK_INF: + { + // necessary for llama + if (src0->grad) { + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 2); + const int n_past = ((int32_t *) src1->data)[0]; + src0->grad = + ggml_add_impl(ctx, src0->grad, + ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false), + inplace); + } + if (src1->grad) { + // noop + } + } break; + case GGML_OP_DIAG_MASK_ZERO: + { + // necessary for llama + if (src0->grad) { + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 2); + const int n_past = ((int32_t *) src1->data)[0]; + src0->grad = + ggml_add_impl(ctx, src0->grad, + ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false), + inplace); + } + if (src1->grad) { + // noop + } + } break; + case GGML_OP_SOFT_MAX: + { + // necessary for llama + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, src0->grad, + ggml_soft_max_back(ctx, tensor->grad, tensor), + inplace); + } + + } break; + case GGML_OP_SOFT_MAX_BACK: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_ROPE: + { + // necessary for llama + if (src0->grad) { + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 3); + const int n_past = ((int32_t *) src1->data)[0]; + const int n_dims = ((int32_t *) src1->data)[1]; + const int mode = ((int32_t *) src1->data)[2]; + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_rope_back(ctx, + tensor->grad, + n_past, + n_dims, + mode), + inplace); + } + if (src1->grad) { + // noop + } + } break; + case GGML_OP_ROPE_BACK: + { + if (src0->grad) { + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 3); + const int n_past = ((int32_t *) src1->data)[0]; + const int n_dims = ((int32_t *) src1->data)[1]; + const int mode = ((int32_t *) src1->data)[2]; + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_rope(ctx, + tensor->grad, + n_past, + n_dims, + mode), + inplace); + } + if (src1->grad) { + // noop + } + } break; + case GGML_OP_CONV_1D_1S: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_CONV_1D_2S: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_OP_FLASH_ATTN: + { + struct ggml_tensor * flash_grad = NULL; + if (src0->grad || src1->grad || tensor->opt[0]->grad) { + int32_t t = ggml_get_i32_1d(tensor->opt[1], 0); + GGML_ASSERT(t == 0 || t == 1); + bool masked = t != 0; + flash_grad = + ggml_flash_attn_back(ctx, + src0, + src1, + tensor->opt[0], + tensor->grad, + masked); + } + + if (src0->grad) { + struct ggml_tensor * grad_q = NULL; + const size_t nb0 = flash_grad->nb[0]; + const size_t offset = 0; + switch(src0->n_dims) { + case 2: + { + grad_q = ggml_view_2d(ctx, + flash_grad, + src0->ne[0], + src0->ne[1], + nb0*src0->ne[0], + offset); + } break; + case 3: + { + grad_q = ggml_view_3d(ctx, + flash_grad, + src0->ne[0], + src0->ne[1], + src0->ne[2], + nb0*src0->ne[0], + nb0*src0->ne[0]*src0->ne[1], + offset); + } break; + case 4: + { + grad_q = ggml_view_4d(ctx, + flash_grad, + src0->ne[0], + src0->ne[1], + src0->ne[2], + src0->ne[3], + nb0*src0->ne[0], + nb0*src0->ne[0]*src0->ne[1], + nb0*src0->ne[0]*src0->ne[1]*src0->ne[2], + offset); + } break; + } + + src0->grad = ggml_add_impl(ctx, + src0->grad, + grad_q, + inplace); + } + + if (src1->grad) { + struct ggml_tensor * grad_k = NULL; + const size_t nb0 = flash_grad->nb[0]; + const size_t offset = nb0*src0->ne[0]*src0->ne[1]*src0->ne[2]*src0->ne[3]; + switch(src1->n_dims) { + case 2: + { + grad_k = ggml_view_2d(ctx, + flash_grad, + src1->ne[0], + src1->ne[1], + nb0*src1->ne[0], + offset); + } break; + case 3: + { + grad_k = ggml_view_3d(ctx, + flash_grad, + src1->ne[0], + src1->ne[1], + src1->ne[2], + nb0*src1->ne[0], + nb0*src1->ne[0]*src1->ne[1], + offset); + } break; + case 4: + { + grad_k = ggml_view_4d(ctx, + flash_grad, + src1->ne[0], + src1->ne[1], + src1->ne[2], + src1->ne[3], + nb0*src1->ne[0], + nb0*src1->ne[0]*src1->ne[1], + nb0*src1->ne[0]*src1->ne[1]*src1->ne[2], + offset); + } break; + } + + src1->grad = ggml_add_impl(ctx, + src1->grad, + grad_k, + inplace); + } + + struct ggml_tensor * opt0 = tensor->opt[0]; + + if (opt0->grad) { + struct ggml_tensor * grad_v = NULL; + const size_t nb0 = flash_grad->nb[0]; + const size_t offset = nb0*src0->ne[0]*src0->ne[1]*src0->ne[2]*src0->ne[3] + + nb0*src1->ne[0]*src1->ne[1]*src1->ne[2]*src1->ne[3]; + switch(opt0->n_dims) { + case 2: + { + grad_v = ggml_view_2d(ctx, + flash_grad, + opt0->ne[0], + opt0->ne[1], + nb0*opt0->ne[0], + offset); + } break; + case 3: + { + grad_v = ggml_view_3d(ctx, + flash_grad, + opt0->ne[0], + opt0->ne[1], + opt0->ne[2], + nb0*opt0->ne[0], + nb0*opt0->ne[0]*opt0->ne[1], + offset); + } break; + case 4: + { + grad_v = ggml_view_4d(ctx, + flash_grad, + opt0->ne[0], + opt0->ne[1], + opt0->ne[2], + opt0->ne[3], + nb0*opt0->ne[0], + nb0*opt0->ne[0]*opt0->ne[1], + nb0*opt0->ne[0]*opt0->ne[1]*opt0->ne[2], + offset); + } break; + } + + opt0->grad = ggml_add_impl(ctx, + opt0->grad, + grad_v, + inplace); + } + } break; + case GGML_OP_FLASH_FF: + { + GGML_ASSERT(false); // not supported + } break; + case GGML_OP_FLASH_ATTN_BACK: + { + GGML_ASSERT(false); // not supported + } break; + case GGML_OP_MAP_UNARY: + case GGML_OP_MAP_BINARY: + { + GGML_ASSERT(false); // not supported + } break; + case GGML_OP_CROSS_ENTROPY_LOSS: + { + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_cross_entropy_loss_back(ctx, + src0, + src1, + tensor->grad), + inplace); + } + } break; + case GGML_OP_CROSS_ENTROPY_LOSS_BACK: + { + GGML_ASSERT(false); // not supported + } break; + case GGML_OP_NONE: + { + // nop + } break; + case GGML_OP_COUNT: + { + GGML_ASSERT(false); + } break; + } +} + +static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) { + if (node->grad == NULL) { + // this usually happens when we generate intermediate nodes from constants in the backward pass + // it can also happen during forward pass, if the user performs computations with constants + if (node->op != GGML_OP_NONE) { + //GGML_PRINT_DEBUG("%s: warning: node %p has no grad, but op %d\n", __func__, (void *) node, node->op); + } + } + + // check if already visited + for (int i = 0; i < cgraph->n_nodes; i++) { + if (cgraph->nodes[i] == node) { + return; + } + } + + for (int i = 0; i < cgraph->n_leafs; i++) { + if (cgraph->leafs[i] == node) { + return; + } + } + + if (node->src0) { + ggml_visit_parents(cgraph, node->src0); + } + + if (node->src1) { + ggml_visit_parents(cgraph, node->src1); + } + + for (int i = 0; i < GGML_MAX_OPT; ++i) { + if (node->opt[i]) { + ggml_visit_parents(cgraph, node->opt[i]); + } + } + + if (node->op == GGML_OP_NONE && node->grad == NULL) { + // reached a leaf node, not part of the gradient graph (e.g. a constant) + GGML_ASSERT(cgraph->n_leafs < GGML_MAX_NODES); + + if (strlen(node->name) == 0) { + snprintf(node->name, sizeof(node->name), "leaf_%d", cgraph->n_leafs); + } + + cgraph->leafs[cgraph->n_leafs] = node; + cgraph->n_leafs++; + } else { + GGML_ASSERT(cgraph->n_nodes < GGML_MAX_NODES); + + if (strlen(node->name) == 0) { + snprintf(node->name, sizeof(node->name), "node_%d", cgraph->n_nodes); + } + + cgraph->nodes[cgraph->n_nodes] = node; + cgraph->grads[cgraph->n_nodes] = node->grad; + cgraph->n_nodes++; + } +} + +static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) { + if (!expand) { + cgraph->n_nodes = 0; + cgraph->n_leafs = 0; + } + + const int n0 = cgraph->n_nodes; + UNUSED(n0); + + ggml_visit_parents(cgraph, tensor); + + const int n_new = cgraph->n_nodes - n0; + GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new); + + if (n_new > 0) { + // the last added node should always be starting point + GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor); + } +} + +void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) { + ggml_build_forward_impl(cgraph, tensor, true); +} + +struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) { + struct ggml_cgraph result = { + /*.n_nodes =*/ 0, + /*.n_leafs =*/ 0, + /*.n_threads =*/ GGML_DEFAULT_N_THREADS, + /*.work_size =*/ 0, + /*.work =*/ NULL, + /*.nodes =*/ { NULL }, + /*.grads =*/ { NULL }, + /*.leafs =*/ { NULL }, + /*.perf_runs =*/ 0, + /*.perf_cycles =*/ 0, + /*.perf_time_us =*/ 0, + }; + + ggml_build_forward_impl(&result, tensor, false); + + return result; +} + +struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep) { + struct ggml_cgraph result = *gf; + + GGML_ASSERT(gf->n_nodes > 0); + + // if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph + if (keep) { + for (int i = 0; i < gf->n_nodes; i++) { + struct ggml_tensor * node = gf->nodes[i]; + + if (node->grad) { + node->grad = ggml_dup_tensor(ctx, node); + gf->grads[i] = node->grad; + } + } + } + + for (int i = gf->n_nodes - 1; i >= 0; i--) { + struct ggml_tensor * node = gf->nodes[i]; + + // because we detached the grad nodes from the original graph, we can afford inplace operations + if (node->grad) { + ggml_compute_backward(ctx, node, keep); + } + } + + for (int i = gf->n_nodes - 1; i >= 0; i--) { + struct ggml_tensor * node = gf->nodes[i]; + + if (node->is_param) { + GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node); + ggml_build_forward_impl(&result, node->grad, true); + } + } + + return result; +} + +// +// thread data +// +// synchronization is done via busy loops +// I tried using spin locks, but not sure how to use them correctly - the things I tried were slower than busy loops +// + +#ifdef __APPLE__ + +//#include +// +//typedef os_unfair_lock ggml_lock_t; +// +//#define ggml_lock_init(x) UNUSED(x) +//#define ggml_lock_destroy(x) UNUSED(x) +//#define ggml_lock_lock os_unfair_lock_lock +//#define ggml_lock_unlock os_unfair_lock_unlock +// +//#define GGML_LOCK_INITIALIZER OS_UNFAIR_LOCK_INIT + +typedef int ggml_lock_t; + +#define ggml_lock_init(x) UNUSED(x) +#define ggml_lock_destroy(x) UNUSED(x) +#define ggml_lock_lock(x) UNUSED(x) +#define ggml_lock_unlock(x) UNUSED(x) + +#define GGML_LOCK_INITIALIZER 0 + +typedef pthread_t ggml_thread_t; + +#define ggml_thread_create pthread_create +#define ggml_thread_join pthread_join + +#else + +//typedef pthread_spinlock_t ggml_lock_t; + +//#define ggml_lock_init(x) pthread_spin_init(x, PTHREAD_PROCESS_PRIVATE) +//#define ggml_lock_destroy pthread_spin_destroy +//#define ggml_lock_lock pthread_spin_lock +//#define ggml_lock_unlock pthread_spin_unlock + +typedef int ggml_lock_t; + +#define ggml_lock_init(x) UNUSED(x) +#define ggml_lock_destroy(x) UNUSED(x) +#if defined(__x86_64__) || (defined(_MSC_VER) && defined(_M_AMD64)) +#define ggml_lock_lock(x) _mm_pause() +#else +#define ggml_lock_lock(x) UNUSED(x) +#endif +#define ggml_lock_unlock(x) UNUSED(x) + +#define GGML_LOCK_INITIALIZER 0 + +typedef pthread_t ggml_thread_t; + +#define ggml_thread_create pthread_create +#define ggml_thread_join pthread_join + +#endif + +struct ggml_compute_state_shared { + ggml_lock_t spin; + + int n_threads; + + // synchronization primitives + atomic_int n_ready; + atomic_bool has_work; + atomic_bool stop; // stop all threads +}; + +struct ggml_compute_state { + ggml_thread_t thrd; + + struct ggml_compute_params params; + struct ggml_tensor * node; + + struct ggml_compute_state_shared * shared; +}; + +static thread_ret_t ggml_graph_compute_thread(void * data) { + struct ggml_compute_state * state = (struct ggml_compute_state *) data; + + const int n_threads = state->shared->n_threads; + + while (true) { + if (atomic_fetch_add(&state->shared->n_ready, 1) == n_threads - 1) { + atomic_store(&state->shared->has_work, false); + } else { + while (atomic_load(&state->shared->has_work)) { + if (atomic_load(&state->shared->stop)) { + return 0; + } + ggml_lock_lock (&state->shared->spin); + ggml_lock_unlock(&state->shared->spin); + } + } + + atomic_fetch_sub(&state->shared->n_ready, 1); + + // wait for work + while (!atomic_load(&state->shared->has_work)) { + if (atomic_load(&state->shared->stop)) { + return 0; + } + ggml_lock_lock (&state->shared->spin); + ggml_lock_unlock(&state->shared->spin); + } + + // check if we should stop + if (atomic_load(&state->shared->stop)) { + break; + } + + if (state->node) { + if (state->params.ith < state->params.nth) { + ggml_compute_forward(&state->params, state->node); + } + + state->node = NULL; + } else { + break; + } + } + + return 0; +} + +void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { + const int n_threads = cgraph->n_threads; + + struct ggml_compute_state_shared state_shared = { + /*.spin =*/ GGML_LOCK_INITIALIZER, + /*.n_threads =*/ n_threads, + /*.n_ready =*/ 0, + /*.has_work =*/ false, + /*.stop =*/ false, + }; + struct ggml_compute_state * workers = n_threads > 1 ? alloca(sizeof(struct ggml_compute_state)*(n_threads - 1)) : NULL; + + // create thread pool + if (n_threads > 1) { + ggml_lock_init(&state_shared.spin); + + atomic_store(&state_shared.has_work, true); + + for (int j = 0; j < n_threads - 1; j++) { + workers[j] = (struct ggml_compute_state) { + .thrd = 0, + .params = { + .type = GGML_TASK_COMPUTE, + .ith = j + 1, + .nth = n_threads, + .wsize = cgraph->work ? ggml_nbytes(cgraph->work) : 0, + .wdata = cgraph->work ? cgraph->work->data : NULL, + }, + .node = NULL, + .shared = &state_shared, + }; + + int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]); + GGML_ASSERT(rc == 0); + UNUSED(rc); + } + } + + // initialize tasks + work buffer + { + size_t work_size = 0; + + // thread scheduling for the different operations + for (int i = 0; i < cgraph->n_nodes; i++) { + struct ggml_tensor * node = cgraph->nodes[i]; + + switch (node->op) { + case GGML_OP_CPY: + case GGML_OP_DUP: + { + node->n_tasks = n_threads; + + size_t cur = 0; + if (ggml_is_quantized(node->type)) { + cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->ne[0] * n_threads; + } + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_ADD: + case GGML_OP_ADD1: + { + node->n_tasks = n_threads; + + size_t cur = 0; + + if (ggml_is_quantized(node->src0->type)) { + cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->src0->ne[0] * n_threads; + } + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_ACC: + { + node->n_tasks = n_threads; + + size_t cur = 0; + + if (ggml_is_quantized(node->src0->type)) { + cur = GGML_TYPE_SIZE[GGML_TYPE_F32] * node->src1->ne[0] * n_threads; + } + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_SUB: + case GGML_OP_DIV: + case GGML_OP_SQR: + case GGML_OP_SQRT: + case GGML_OP_LOG: + case GGML_OP_SUM: + case GGML_OP_SUM_ROWS: + case GGML_OP_MEAN: + case GGML_OP_REPEAT: + case GGML_OP_REPEAT_BACK: + case GGML_OP_REPEAT2: + case GGML_OP_ABS: + case GGML_OP_SGN: + case GGML_OP_NEG: + case GGML_OP_STEP: + case GGML_OP_RELU: + { + node->n_tasks = 1; + } break; + case GGML_OP_MUL: + case GGML_OP_GELU: + case GGML_OP_SILU: + case GGML_OP_SILU_BACK: + case GGML_OP_NORM: + case GGML_OP_RMS_NORM: + case GGML_OP_RMS_NORM_BACK: + { + node->n_tasks = n_threads; + } break; + case GGML_OP_MUL_MAT: + case GGML_OP_OUT_PROD: + { + node->n_tasks = n_threads; + + // TODO: use different scheduling for different matrix sizes + //const int nr0 = ggml_nrows(node->src0); + //const int nr1 = ggml_nrows(node->src1); + + //node->n_tasks = MIN(n_threads, MAX(1, nr0/128)); + //printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks = %d\n", nr0, nr1, nr0*nr1, node->n_tasks); + + size_t cur = 0; + +#if defined(GGML_USE_CUBLAS) + if (ggml_cuda_can_mul_mat(node->src0, node->src1, node)) { + node->n_tasks = 1; // TODO: this actually is doing nothing + // the threads are still spinning + } + else +#elif defined(GGML_USE_CLBLAST) + if (ggml_cl_can_mul_mat(node->src0, node->src1, node)) { + node->n_tasks = 1; // TODO: this actually is doing nothing + // the threads are still spinning + cur = ggml_cl_mul_mat_get_wsize(node->src0, node->src1, node); + } + else +#endif + if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) { +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) + if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { + node->n_tasks = 1; // TODO: this actually is doing nothing + // the threads are still spinning + // here we need memory just for single 2D matrix from src0 + cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]); + } else { + cur = GGML_TYPE_SIZE[GGML_TYPE_F16]*ggml_nelements(node->src1); + } +#else + cur = GGML_TYPE_SIZE[GGML_TYPE_F16]*ggml_nelements(node->src1); +#endif + } else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) { + cur = 0; +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) + if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { + node->n_tasks = 1; + } +#endif + } else if (ggml_is_quantized(node->src0->type) && node->src1->type == GGML_TYPE_F32) { +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) + if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { + node->n_tasks = 1; + cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]); + } else +#endif + { + const enum ggml_type type_q = quantize_fns[node->src0->type].vec_dot_type; + cur = GGML_TYPE_SIZE[type_q]*ggml_nelements(node->src1)/GGML_BLCK_SIZE[type_q]; + } + } else { + GGML_ASSERT(false); + } + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_SCALE: + { + node->n_tasks = n_threads; + } break; + case GGML_OP_SET: + case GGML_OP_CONT: + case GGML_OP_RESHAPE: + case GGML_OP_VIEW: + case GGML_OP_PERMUTE: + case GGML_OP_TRANSPOSE: + case GGML_OP_GET_ROWS: + case GGML_OP_GET_ROWS_BACK: + case GGML_OP_DIAG: + case GGML_OP_DIAG_MASK_ZERO: + { + node->n_tasks = 1; + } break; + case GGML_OP_DIAG_MASK_INF: + case GGML_OP_SOFT_MAX: + case GGML_OP_SOFT_MAX_BACK: + case GGML_OP_ROPE: + case GGML_OP_ROPE_BACK: + { + node->n_tasks = n_threads; + } break; + case GGML_OP_ALIBI: + { + node->n_tasks = 1; //TODO + } break; + case GGML_OP_CLAMP: + { + node->n_tasks = 1; //TODO + } break; + case GGML_OP_CONV_1D_1S: + case GGML_OP_CONV_1D_2S: + { + node->n_tasks = n_threads; + + GGML_ASSERT(node->src0->ne[3] == 1); + GGML_ASSERT(node->src1->ne[2] == 1); + GGML_ASSERT(node->src1->ne[3] == 1); + + size_t cur = 0; + const int nk = node->src0->ne[0]; + + if (node->src0->type == GGML_TYPE_F16 && + node->src1->type == GGML_TYPE_F32) { + cur = sizeof(ggml_fp16_t)*( + nk*ggml_up32(node->src0->ne[1])*node->src0->ne[2] + + ( 2*(nk/2) + node->src1->ne[0])*node->src1->ne[1] + ); + } else if (node->src0->type == GGML_TYPE_F32 && + node->src1->type == GGML_TYPE_F32) { + cur = sizeof(float)*( + nk*ggml_up32(node->src0->ne[1])*node->src0->ne[2] + + ( 2*(nk/2) + node->src1->ne[0])*node->src1->ne[1] + ); + } else { + GGML_ASSERT(false); + } + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_FLASH_ATTN: + { + node->n_tasks = n_threads; + + size_t cur = 0; + + const int64_t ne11 = ggml_up(node->src1->ne[1], GGML_SOFT_MAX_UNROLL); + + if (node->src1->type == GGML_TYPE_F32) { + cur = sizeof(float)*ne11*node->n_tasks; // TODO: this can become (n_tasks-1) + cur += sizeof(float)*ne11*node->n_tasks; // this is overestimated by x2 + } + + if (node->src1->type == GGML_TYPE_F16) { + cur = sizeof(float)*ne11*node->n_tasks; // TODO: this can become (n_tasks-1) + cur += sizeof(float)*ne11*node->n_tasks; // this is overestimated by x2 + } + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_FLASH_FF: + { + node->n_tasks = n_threads; + + size_t cur = 0; + + if (node->src1->type == GGML_TYPE_F32) { + cur = sizeof(float)*node->src1->ne[1]*node->n_tasks; // TODO: this can become (n_tasks-1) + cur += sizeof(float)*node->src1->ne[1]*node->n_tasks; // this is overestimated by x2 + } + + if (node->src1->type == GGML_TYPE_F16) { + cur = sizeof(float)*node->src1->ne[1]*node->n_tasks; // TODO: this can become (n_tasks-1) + cur += sizeof(float)*node->src1->ne[1]*node->n_tasks; // this is overestimated by x2 + } + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_FLASH_ATTN_BACK: + { + node->n_tasks = n_threads; + + size_t cur = 0; + + const int64_t D = node->src0->ne[0]; + const int64_t ne11 = ggml_up(node->src1->ne[1], GGML_SOFT_MAX_UNROLL); + const int64_t mxDn = MAX(D, ne11) * 2; // *2 because of S and SM in ggml_compute_forward_flash_attn_back + if (node->src1->type == GGML_TYPE_F32) { + cur = sizeof(float)*mxDn*node->n_tasks; // TODO: this can become (n_tasks-1) + cur += sizeof(float)*mxDn*node->n_tasks; // this is overestimated by x2 + } + + if (node->src1->type == GGML_TYPE_F16) { + cur = sizeof(float)*mxDn*node->n_tasks; // TODO: this can become (n_tasks-1) + cur += sizeof(float)*mxDn*node->n_tasks; // this is overestimated by x2 + } + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_MAP_UNARY: + case GGML_OP_MAP_BINARY: + { + node->n_tasks = 1; + } break; + case GGML_OP_CROSS_ENTROPY_LOSS: + { + node->n_tasks = n_threads; + + size_t cur = ggml_type_size(node->type)*(node->n_tasks + node->src0->ne[0]*node->n_tasks); + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_CROSS_ENTROPY_LOSS_BACK: + { + node->n_tasks = n_threads; + + size_t cur = ggml_type_size(node->type)*node->src0->ne[0]*node->n_tasks; + + work_size = MAX(work_size, cur); + } break; + case GGML_OP_NONE: + { + node->n_tasks = 1; + } break; + case GGML_OP_COUNT: + { + GGML_ASSERT(false); + } break; + } + } + + if (cgraph->work != NULL && work_size > cgraph->work_size) { + GGML_ASSERT(false); // TODO: better handling + } + + if (work_size > 0 && cgraph->work == NULL) { + cgraph->work_size = work_size + CACHE_LINE_SIZE*(n_threads - 1); + + GGML_PRINT_DEBUG("%s: allocating work buffer for graph (%zu bytes)\n", __func__, cgraph->work_size); + cgraph->work = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cgraph->work_size); + } + } + + const int64_t perf_start_cycles = ggml_perf_cycles(); + const int64_t perf_start_time_us = ggml_perf_time_us(); + + for (int i = 0; i < cgraph->n_nodes; i++) { + GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, i, cgraph->n_nodes); + + struct ggml_tensor * node = cgraph->nodes[i]; + + // TODO: this could be used to avoid unnecessary computations, but it needs to be improved + //if (node->grad == NULL && node->perf_runs > 0) { + // continue; + //} + + const int64_t perf_node_start_cycles = ggml_perf_cycles(); + const int64_t perf_node_start_time_us = ggml_perf_time_us(); + + // INIT + struct ggml_compute_params params = { + /*.type =*/ GGML_TASK_INIT, + /*.ith =*/ 0, + /*.nth =*/ node->n_tasks, + /*.wsize =*/ cgraph->work ? ggml_nbytes(cgraph->work) : 0, + /*.wdata =*/ cgraph->work ? cgraph->work->data : NULL, + }; + + ggml_compute_forward(¶ms, node); + + // COMPUTE + if (node->n_tasks > 1) { + if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) { + atomic_store(&state_shared.has_work, false); + } + + while (atomic_load(&state_shared.has_work)) { + ggml_lock_lock (&state_shared.spin); + ggml_lock_unlock(&state_shared.spin); + } + + // launch thread pool + for (int j = 0; j < n_threads - 1; j++) { + workers[j].params = (struct ggml_compute_params) { + .type = GGML_TASK_COMPUTE, + .ith = j + 1, + .nth = node->n_tasks, + .wsize = cgraph->work ? ggml_nbytes(cgraph->work) : 0, + .wdata = cgraph->work ? cgraph->work->data : NULL, + }; + workers[j].node = node; + } + + atomic_fetch_sub(&state_shared.n_ready, 1); + + while (atomic_load(&state_shared.n_ready) > 0) { + ggml_lock_lock (&state_shared.spin); + ggml_lock_unlock(&state_shared.spin); + } + + atomic_store(&state_shared.has_work, true); + } + + params.type = GGML_TASK_COMPUTE; + ggml_compute_forward(¶ms, node); + + // wait for thread pool + if (node->n_tasks > 1) { + if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) { + atomic_store(&state_shared.has_work, false); + } + + while (atomic_load(&state_shared.has_work)) { + ggml_lock_lock (&state_shared.spin); + ggml_lock_unlock(&state_shared.spin); + } + + atomic_fetch_sub(&state_shared.n_ready, 1); + + while (atomic_load(&state_shared.n_ready) != 0) { + ggml_lock_lock (&state_shared.spin); + ggml_lock_unlock(&state_shared.spin); + } + } + + // FINALIZE + if (node->n_tasks > 1) { + if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) { + atomic_store(&state_shared.has_work, false); + } + + while (atomic_load(&state_shared.has_work)) { + ggml_lock_lock (&state_shared.spin); + ggml_lock_unlock(&state_shared.spin); + } + + // launch thread pool + for (int j = 0; j < n_threads - 1; j++) { + workers[j].params = (struct ggml_compute_params) { + .type = GGML_TASK_FINALIZE, + .ith = j + 1, + .nth = node->n_tasks, + .wsize = cgraph->work ? ggml_nbytes(cgraph->work) : 0, + .wdata = cgraph->work ? cgraph->work->data : NULL, + }; + workers[j].node = node; + } + + atomic_fetch_sub(&state_shared.n_ready, 1); + + while (atomic_load(&state_shared.n_ready) > 0) { + ggml_lock_lock (&state_shared.spin); + ggml_lock_unlock(&state_shared.spin); + } + + atomic_store(&state_shared.has_work, true); + } + + params.type = GGML_TASK_FINALIZE; + ggml_compute_forward(¶ms, node); + + // wait for thread pool + if (node->n_tasks > 1) { + if (atomic_fetch_add(&state_shared.n_ready, 1) == n_threads - 1) { + atomic_store(&state_shared.has_work, false); + } + + while (atomic_load(&state_shared.has_work)) { + ggml_lock_lock (&state_shared.spin); + ggml_lock_unlock(&state_shared.spin); + } + + atomic_fetch_sub(&state_shared.n_ready, 1); + + while (atomic_load(&state_shared.n_ready) != 0) { + ggml_lock_lock (&state_shared.spin); + ggml_lock_unlock(&state_shared.spin); + } + } + + // performance stats (node) + { + int64_t perf_cycles_cur = ggml_perf_cycles() - perf_node_start_cycles; + int64_t perf_time_us_cur = ggml_perf_time_us() - perf_node_start_time_us; + + node->perf_runs++; + node->perf_cycles += perf_cycles_cur; + node->perf_time_us += perf_time_us_cur; + } + } + + // join thread pool + if (n_threads > 1) { + atomic_store(&state_shared.stop, true); + atomic_store(&state_shared.has_work, true); + + for (int j = 0; j < n_threads - 1; j++) { + int rc = ggml_thread_join(workers[j].thrd, NULL); + GGML_ASSERT(rc == 0); + UNUSED(rc); + } + + ggml_lock_destroy(&state_shared.spin); + } + + // performance stats (graph) + { + int64_t perf_cycles_cur = ggml_perf_cycles() - perf_start_cycles; + int64_t perf_time_us_cur = ggml_perf_time_us() - perf_start_time_us; + + cgraph->perf_runs++; + cgraph->perf_cycles += perf_cycles_cur; + cgraph->perf_time_us += perf_time_us_cur; + + GGML_PRINT_DEBUG("%s: perf (%d) - cpu = %.3f / %.3f ms, wall = %.3f / %.3f ms\n", + __func__, cgraph->perf_runs, + (double) perf_cycles_cur / (double) ggml_cycles_per_ms(), + (double) cgraph->perf_cycles / (double) ggml_cycles_per_ms() / (double) cgraph->perf_runs, + (double) perf_time_us_cur / 1000.0, + (double) cgraph->perf_time_us / 1000.0 / cgraph->perf_runs); + } +} + +void ggml_graph_reset(struct ggml_cgraph * cgraph) { + for (int i = 0; i < cgraph->n_nodes; i++) { + struct ggml_tensor * grad = cgraph->grads[i]; + + if (grad) { + ggml_set_zero(grad); + } + } +} + +struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) { + for (int i = 0; i < cgraph->n_leafs; i++) { + struct ggml_tensor * leaf = cgraph->leafs[i]; + + if (strcmp(leaf->name, name) == 0) { + return leaf; + } + } + + for (int i = 0; i < cgraph->n_nodes; i++) { + struct ggml_tensor * node = cgraph->nodes[i]; + + if (strcmp(node->name, name) == 0) { + return node; + } + } + + return NULL; +} + +static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fout) { + const int64_t * ne = tensor->ne; + const size_t * nb = tensor->nb; + + fprintf(fout, "%-6s %-12s %8d %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %16zu %16zu %16zu %16zu %16p %32s\n", + ggml_type_name(tensor->type), + ggml_op_name (tensor->op), + tensor->n_dims, + ne[0], ne[1], ne[2], ne[3], + nb[0], nb[1], nb[2], nb[3], + tensor->data, + tensor->name); +} + +static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char * arg, FILE * fout) { + const int64_t * ne = tensor->ne; + const size_t * nb = tensor->nb; + + fprintf(fout, "%-6s %-6s %-12s %8d %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %16zu %16zu %16zu %16zu %8d %16p %32s\n", + arg, + ggml_type_name(tensor->type), + ggml_op_name (tensor->op), + tensor->n_dims, + ne[0], ne[1], ne[2], ne[3], + nb[0], nb[1], nb[2], nb[3], + tensor->n_tasks, + tensor->data, + tensor->name); +} + +void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { + //assert(cgraph->work == NULL); + //assert(cgraph->work_size == 0); + + uint64_t size_eval = 0; + + // compute size of intermediate results + // TODO: does not take into account scratch buffers !!!! + for (int i = 0; i < cgraph->n_nodes; ++i) { + size_eval += ggml_nbytes(cgraph->nodes[i]); + } + + // print + { + FILE * fout = stdout; + + fprintf(fout, "\n"); + fprintf(fout, "%-16s %8x\n", "magic", GGML_FILE_MAGIC); + fprintf(fout, "%-16s %8d\n", "version", GGML_FILE_VERSION); + fprintf(fout, "%-16s %8d\n", "leafs", cgraph->n_leafs); + fprintf(fout, "%-16s %8d\n", "nodes", cgraph->n_nodes); + fprintf(fout, "%-16s %" PRIu64 "\n", "eval", size_eval); + + // header + fprintf(fout, "\n"); + fprintf(fout, "%-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %16s %16s\n", + "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "DATA", "NAME"); + + for (int i = 0; i < cgraph->n_leafs; ++i) { + ggml_graph_export_leaf(cgraph->leafs[i], fout); + + GGML_ASSERT(cgraph->leafs[i]->op == GGML_OP_NONE); + GGML_ASSERT(cgraph->leafs[i]->src0 == NULL); + GGML_ASSERT(cgraph->leafs[i]->src1 == NULL); + } + + // header + fprintf(fout, "\n"); + fprintf(fout, "%-6s %-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %8s %16s %16s\n", + "ARG", "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "NTASKS", "DATA", "NAME"); + + for (int i = 0; i < cgraph->n_nodes; ++i) { + ggml_graph_export_node(cgraph->nodes[i], "DST", fout); + + if (cgraph->nodes[i]->src0) { + ggml_graph_export_node(cgraph->nodes[i]->src0, "SRC0", fout); + } + + if (cgraph->nodes[i]->src1) { + ggml_graph_export_node(cgraph->nodes[i]->src1, "SRC1", fout); + } + + for (int j = 0; j < GGML_MAX_OPT; ++j) { + if (cgraph->nodes[i]->opt[j]) { + ggml_graph_export_node(cgraph->nodes[i]->opt[j], "OPT", fout); + } + } + + fprintf(fout, "\n"); + } + + fprintf(fout, "\n"); + } + + // write binary data + { + FILE * fout = fopen(fname, "wb"); + + if (!fout) { + fprintf(stderr, "%s: failed to open %s\n", __func__, fname); + return; + } + + // header + { + const uint32_t magic = GGML_FILE_MAGIC; + const uint32_t version = GGML_FILE_VERSION; + const uint32_t n_leafs = cgraph->n_leafs; + const uint32_t nodes = cgraph->n_nodes; + + fwrite(&magic, sizeof(uint32_t), 1, fout); + fwrite(&version, sizeof(uint32_t), 1, fout); + fwrite(&n_leafs, sizeof(uint32_t), 1, fout); + fwrite(&nodes, sizeof(uint32_t), 1, fout); + fwrite(&size_eval, sizeof(uint64_t), 1, fout); + } + + // leafs + { + for (int i = 0; i < cgraph->n_leafs; ++i) { + const struct ggml_tensor * tensor = cgraph->leafs[i]; + + const uint32_t type = tensor->type; + const uint32_t op = tensor->op; + const uint32_t n_dims = tensor->n_dims; + + fwrite(&type, sizeof(uint32_t), 1, fout); + fwrite(&op, sizeof(uint32_t), 1, fout); + fwrite(&n_dims, sizeof(uint32_t), 1, fout); + + for (int j = 0; j < GGML_MAX_DIMS; ++j) { + const uint64_t ne = tensor->ne[j]; + const uint64_t nb = tensor->nb[j]; + + fwrite(&ne, sizeof(uint64_t), 1, fout); + fwrite(&nb, sizeof(uint64_t), 1, fout); + } + + // store the pointer address + { + const uint64_t ptr = (uint64_t) tensor->data; + + fwrite(&ptr, sizeof(uint64_t), 1, fout); + } + + fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + + // dump the data + // TODO: pad this to 32 byte boundary + { + const size_t size = ggml_nbytes(tensor); + + fwrite(tensor->data, sizeof(char), size, fout); + } + } + } + + // nodes + { + for (int i = 0; i < cgraph->n_nodes; ++i) { + const struct ggml_tensor * tensor = cgraph->nodes[i]; + + const uint32_t type = tensor->type; + const uint32_t op = tensor->op; + const uint32_t n_dims = tensor->n_dims; + + fwrite(&type, sizeof(uint32_t), 1, fout); + fwrite(&op, sizeof(uint32_t), 1, fout); + fwrite(&n_dims, sizeof(uint32_t), 1, fout); + + for (int j = 0; j < GGML_MAX_DIMS; ++j) { + const uint64_t ne = tensor->ne[j]; + const uint64_t nb = tensor->nb[j]; + + fwrite(&ne, sizeof(uint64_t), 1, fout); + fwrite(&nb, sizeof(uint64_t), 1, fout); + } + + // store the pointer address + { + const uint64_t ptr = (uint64_t) tensor->data; + + fwrite(&ptr, sizeof(uint64_t), 1, fout); + } + + fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + + // output the op arguments + { + struct ggml_tensor * args[2 + GGML_MAX_OPT] = { NULL }; + + args[0] = tensor->src0; + args[1] = tensor->src1; + + for (int j = 0; j < GGML_MAX_OPT; ++j) { + args[2 + j] = tensor->opt[j]; + } + + for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) { + if (args[j]) { + int32_t idx = -1; + + // check if leaf + { + for (int k = 0; k < cgraph->n_leafs; ++k) { + if (args[j] == cgraph->leafs[k]) { + idx = k; + break; + } + } + } + + // check if node + if (idx == -1) { + for (int k = 0; k < cgraph->n_nodes; ++k) { + if (args[j] == cgraph->nodes[k]) { + idx = GGML_MAX_NODES + k; + break; + } + } + } + + if (idx == -1) { + fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i); + return; + } + + fwrite(&idx, sizeof(int32_t), 1, fout); + } else { + const int32_t nul = -1; + + fwrite(&nul, sizeof(int32_t), 1, fout); + } + } + } + } + } + + fclose(fout); + } +} + +struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) { + assert(*ctx_data == NULL); + assert(*ctx_eval == NULL); + + struct ggml_cgraph result = { 0 }; + + struct ggml_tensor * data = NULL; + + // read file into data + { + FILE * fin = fopen(fname, "rb"); + if (!fin) { + fprintf(stderr, "%s: failed to open %s\n", __func__, fname); + return result; + } + + size_t fsize = 0; + + fseek(fin, 0, SEEK_END); + fsize = ftell(fin); + fseek(fin, 0, SEEK_SET); + + // create the data context + { + const size_t overhead = 1*ggml_tensor_overhead(); + + struct ggml_init_params params = { + .mem_size = fsize + overhead, + .mem_buffer = NULL, + .no_alloc = false, + }; + + *ctx_data = ggml_init(params); + + if (!*ctx_data) { + fprintf(stderr, "%s: failed to create ggml context\n", __func__); + return result; + } + } + + data = ggml_new_tensor_1d(*ctx_data, GGML_TYPE_I8, fsize); + + const size_t ret = fread(data->data, sizeof(char), fsize, fin); + if (ret != fsize) { + fprintf(stderr, "%s: failed to read %s\n", __func__, fname); + return result; + } + + fclose(fin); + } + + // populate result + { + char * ptr = (char *) data->data; + + const uint32_t magic = *(const uint32_t *) ptr; ptr += sizeof(magic); + + if (magic != GGML_FILE_MAGIC) { + fprintf(stderr, "%s: invalid magic number, got %08x\n", __func__, magic); + return result; + } + + const uint32_t version = *(const uint32_t *) ptr; ptr += sizeof(version); + + if (version != GGML_FILE_VERSION) { + fprintf(stderr, "%s: invalid version number\n", __func__); + return result; + } + + const uint32_t n_leafs = *(const uint32_t *) ptr; ptr += sizeof(n_leafs); + const uint32_t n_nodes = *(const uint32_t *) ptr; ptr += sizeof(n_nodes); + const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval); + + result.n_leafs = n_leafs; + result.n_nodes = n_nodes; + + // create the data context + { + const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead(); + + struct ggml_init_params params = { + .mem_size = size_eval + overhead, + .mem_buffer = NULL, + .no_alloc = true, + }; + + *ctx_eval = ggml_init(params); + + if (!*ctx_eval) { + fprintf(stderr, "%s: failed to create ggml context\n", __func__); + return result; + } + } + + // leafs + { + uint32_t type; + uint32_t op; + uint32_t n_dims; + + for (uint32_t i = 0; i < n_leafs; ++i) { + type = *(const uint32_t *) ptr; ptr += sizeof(type); + op = *(const uint32_t *) ptr; ptr += sizeof(op); + n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims); + + int64_t ne[GGML_MAX_DIMS]; + size_t nb[GGML_MAX_DIMS]; + + for (int j = 0; j < GGML_MAX_DIMS; ++j) { + uint64_t ne_cur; + uint64_t nb_cur; + + ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur); + nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur); + + ne[j] = ne_cur; + nb[j] = nb_cur; + } + + struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne); + + tensor->op = (enum ggml_op) op; + + uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur); + + memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME; + + tensor->data = (void *) ptr; + + for (int j = 0; j < GGML_MAX_DIMS; ++j) { + tensor->nb[j] = nb[j]; + } + + result.leafs[i] = tensor; + + ptr += ggml_nbytes(tensor); + + fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor)); + } + } + + ggml_set_no_alloc(*ctx_eval, false); + + // nodes + { + uint32_t type; + uint32_t op; + uint32_t n_dims; + + for (uint32_t i = 0; i < n_nodes; ++i) { + type = *(const uint32_t *) ptr; ptr += sizeof(type); + op = *(const uint32_t *) ptr; ptr += sizeof(op); + n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims); + + enum ggml_op eop = (enum ggml_op) op; + + int64_t ne[GGML_MAX_DIMS]; + size_t nb[GGML_MAX_DIMS]; + + for (int j = 0; j < GGML_MAX_DIMS; ++j) { + uint64_t ne_cur; + uint64_t nb_cur; + + ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur); + nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur); + + ne[j] = ne_cur; + nb[j] = nb_cur; + } + + uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur); // TODO: not yet used + + const char * ptr_name = ptr; ptr += GGML_MAX_NAME; + + const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += (2 + GGML_MAX_OPT)*sizeof(int32_t); + + struct ggml_tensor * args[2 + GGML_MAX_OPT] = { NULL }; + + // parse args + for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) { + const int32_t arg_idx = ptr_arg_idx[j]; + + if (arg_idx == -1) { + continue; + } + + if (arg_idx < GGML_MAX_NODES) { + args[j] = result.leafs[arg_idx]; + } else { + args[j] = result.nodes[arg_idx - GGML_MAX_NODES]; + } + } + + // create the tensor + // "view" operations are handled differently + // TODO: handle inplace ops - currently a copy is always made + + struct ggml_tensor * tensor = NULL; + + switch (eop) { + // TODO: implement other view ops + case GGML_OP_RESHAPE: + { + tensor = ggml_reshape_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3]); + } break; + case GGML_OP_VIEW: + { + tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0); + + uint64_t offs; + memcpy(&offs, args[2]->data, sizeof(offs)); + + tensor->data = ((char *) tensor->data) + offs; + } break; + case GGML_OP_TRANSPOSE: + { + tensor = ggml_transpose(*ctx_eval, args[0]); + } break; + case GGML_OP_PERMUTE: + { + tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0); + } break; + default: + { + tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne); + + tensor->op = eop; + } break; + } + + memcpy(tensor->name, ptr_name, GGML_MAX_NAME); + + for (int j = 0; j < GGML_MAX_DIMS; ++j) { + tensor->nb[j] = nb[j]; + } + + tensor->src0 = args[0]; + tensor->src1 = args[1]; + + for (int j = 0; j < GGML_MAX_OPT; ++j) { + tensor->opt[j] = args[2 + j]; + } + + result.nodes[i] = tensor; + + fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor)); + } + } + } + + return result; +} + +void ggml_graph_print(const struct ggml_cgraph * cgraph) { + int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0}; + + GGML_PRINT("=== GRAPH ===\n"); + + GGML_PRINT_DEBUG("n_threads = %d\n", cgraph->n_threads); + GGML_PRINT_DEBUG("total work size = %zu bytes\n", cgraph->work_size); + + GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes); + for (int i = 0; i < cgraph->n_nodes; i++) { + struct ggml_tensor * node = cgraph->nodes[i]; + + perf_total_per_op_us[node->op] += MAX(1, node->perf_time_us); + + GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n", + i, + node->ne[0], node->ne[1], node->ne[2], + GGML_OP_NAME[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs, + (double) node->perf_cycles / (double) ggml_cycles_per_ms(), + (double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs, + (double) node->perf_time_us / 1000.0, + (double) node->perf_time_us / 1000.0 / node->perf_runs); + } + + GGML_PRINT("n_leafs = %d\n", cgraph->n_leafs); + for (int i = 0; i < cgraph->n_leafs; i++) { + struct ggml_tensor * node = cgraph->leafs[i]; + + GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n", + i, + node->ne[0], node->ne[1], + GGML_OP_NAME[node->op]); + } + + for (int i = 0; i < GGML_OP_COUNT; i++) { + if (perf_total_per_op_us[i] == 0) { + continue; + } + + GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_NAME[i], (double) perf_total_per_op_us[i] / 1000.0); + } + + GGML_PRINT("========================================\n"); +} + +// check if node is part of the graph +static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { + if (cgraph == NULL) { + return true; + } + + for (int i = 0; i < cgraph->n_nodes; i++) { + if (cgraph->nodes[i] == node) { + return true; + } + } + + return false; +} + +static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { + for (int i = 0; i < cgraph->n_nodes; i++) { + struct ggml_tensor * parent = cgraph->nodes[i]; + + if (parent->grad == node) { + return parent; + } + } + + return NULL; +} + +void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) { + char color[16]; + + FILE * fp = fopen(filename, "w"); + GGML_ASSERT(fp); + + fprintf(fp, "digraph G {\n"); + fprintf(fp, " newrank = true;\n"); + fprintf(fp, " rankdir = LR;\n"); + + for (int i = 0; i < gb->n_nodes; i++) { + struct ggml_tensor * node = gb->nodes[i]; + + if (ggml_graph_get_parent(gb, node) != NULL) { + continue; + } + + if (node->is_param) { + snprintf(color, sizeof(color), "yellow"); + } else if (node->grad) { + if (ggml_graph_find(gf, node)) { + snprintf(color, sizeof(color), "green"); + } else { + snprintf(color, sizeof(color), "lightblue"); + } + } else { + snprintf(color, sizeof(color), "white"); + } + + fprintf(fp, " \"%p\" [ " + "style = filled; fillcolor = %s; shape = record; " + "label=\"", + (void *) node, color); + + if (strlen(node->name) > 0) { + fprintf(fp, "%s |", node->name); + } + + if (node->n_dims == 2) { + fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], GGML_OP_SYMBOL[node->op]); + } else { + fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], node->ne[2], GGML_OP_SYMBOL[node->op]); + } + + + if (node->grad) { + fprintf(fp, " | %s\"; ]\n", GGML_OP_SYMBOL[node->grad->op]); + } else { + fprintf(fp, "\"; ]\n"); + } + } + + for (int i = 0; i < gb->n_leafs; i++) { + struct ggml_tensor * node = gb->leafs[i]; + + snprintf(color, sizeof(color), "pink"); + + fprintf(fp, " \"%p\" [ " + "style = filled; fillcolor = %s; shape = record; " + "label=\"", + (void *) node, color); + + if (strlen(node->name) > 0) { + fprintf(fp, "%s | ", node->name); + } + if (ggml_nelements(node) == 1) { + if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) { + fprintf(fp, "%d", ggml_get_i32_1d(node, 0)); + } + else { + fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, 0)); + } + } + else { + fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]); + } + fprintf(fp, "\"; ]\n"); + } + + for (int i = 0; i < gb->n_nodes; i++) { + struct ggml_tensor * node = gb->nodes[i]; + + struct ggml_tensor * parent = ggml_graph_get_parent(gb, node); + + if (node->src0) { + struct ggml_tensor * parent0 = ggml_graph_get_parent(gb, node->src0); + + fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"x\"; ]\n", + parent0 ? (void *) parent0 : (void *) node->src0, + parent0 ? "g" : "x", + parent ? (void *) parent : (void *) node, + parent ? "g" : "x", + parent ? "empty" : "vee", + parent ? "dashed" : "solid"); + } + + if (node->src1) { + struct ggml_tensor * parent1 = ggml_graph_get_parent(gb, node->src1); + + fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"y\"; ]\n", + parent1 ? (void *) parent1 : (void *) node->src1, + parent1 ? "g" : "x", + parent ? (void *) parent : (void *) node, + parent ? "g" : "x", + parent ? "empty" : "vee", + parent ? "dashed" : "solid"); + } + } + + for (int i = 0; i < gb->n_leafs; i++) { + struct ggml_tensor * node = gb->leafs[i]; + + if (node->src0) { + fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"x\"; ]\n", + (void *) node->src0, "x", + (void *) node, "x"); + } + + if (node->src1) { + fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"y\"; ]\n", + (void *) node->src1, "x", + (void *) node, "x"); + } + } + + fprintf(fp, "}\n"); + + fclose(fp); + + GGML_PRINT("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename); +} + +//////////////////////////////////////////////////////////////////////////////// + +static void ggml_opt_set_params(int np, struct ggml_tensor * const ps[], const float * x) { + int i = 0; + for (int p = 0; p < np; ++p) { + const int64_t ne = ggml_nelements(ps[p]) ; + // TODO: add function to set tensor from array + for (int64_t j = 0; j < ne; ++j) { + ggml_set_f32_1d(ps[p], j, x[i++]); + } + } +} + +static void ggml_opt_get_params(int np, struct ggml_tensor * const ps[], float * x) { + int i = 0; + for (int p = 0; p < np; ++p) { + const int64_t ne = ggml_nelements(ps[p]) ; + // TODO: add function to get all elements at once + for (int64_t j = 0; j < ne; ++j) { + x[i++] = ggml_get_f32_1d(ps[p], j); + } + } +} + +static void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g) { + int i = 0; + for (int p = 0; p < np; ++p) { + const int64_t ne = ggml_nelements(ps[p]) ; + // TODO: add function to get all elements at once + for (int64_t j = 0; j < ne; ++j) { + g[i++] = ggml_get_f32_1d(ps[p]->grad, j); + } + } +} + +// +// ADAM +// +// ref: https://arxiv.org/pdf/1412.6980.pdf +// + +static enum ggml_opt_result ggml_opt_adam( + struct ggml_context * ctx, + struct ggml_opt_context * opt, + struct ggml_opt_params params, + struct ggml_tensor * f, + struct ggml_cgraph * gf, + struct ggml_cgraph * gb) { + GGML_ASSERT(ggml_is_scalar(f)); + + gf->n_threads = params.n_threads; + gb->n_threads = params.n_threads; + + // these will store the parameters we want to optimize + struct ggml_tensor * ps[GGML_MAX_PARAMS]; + + int np = 0; + int nx = 0; + for (int i = 0; i < gf->n_nodes; ++i) { + if (gf->nodes[i]->is_param) { + GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op); + + GGML_ASSERT(np < GGML_MAX_PARAMS); + + ps[np++] = gf->nodes[i]; + nx += ggml_nelements(gf->nodes[i]); + } + } + + if ((opt->params.type != params.type) || (opt->nx != nx) || (opt->params.past != params.past)) { + int iter = opt->iter; + ggml_opt_init(opt->ctx, opt, params, nx); + opt->iter = iter; + } + + // constants + const float sched = params.adam.sched; + const float decay = params.adam.decay * sched; + const float alpha = params.adam.alpha * sched; + const float beta1 = params.adam.beta1; + const float beta2 = params.adam.beta2; + const float eps = params.adam.eps; + + float * x = opt->adam.x->data; // view of the parameters + float * g1 = opt->adam.g1->data; // gradient + float * g2 = opt->adam.g2->data; // gradient squared + float * m = opt->adam.m->data; // first moment + float * v = opt->adam.v->data; // second moment + float * mh = opt->adam.mh->data; // first moment hat + float * vh = opt->adam.vh->data; // second moment hat + + float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values + + // update view + ggml_opt_get_params(np, ps, x); + + // compute the function value + ggml_graph_reset (gf); + ggml_set_f32 (f->grad, 1.0f); + ggml_graph_compute(ctx, gb); + + opt->adam.fx_prev = ggml_get_f32_1d(f, 0); + opt->adam.fx_best = opt->adam.fx_prev; + if (pf) { + pf[opt->iter % params.past] = opt->adam.fx_prev; + } + + // initialize + if (opt->just_initialized) { + opt->adam.n_no_improvement = 0; + opt->just_initialized = false; + } + + float * fx_best = &opt->adam.fx_best; + float * fx_prev = &opt->adam.fx_prev; + int * n_no_improvement = &opt->adam.n_no_improvement; + + int iter0 = opt->iter; + + // run the optimizer + for (int t = 0; t < params.adam.n_iter; ++t) { + opt->iter = iter0 + t + 1; + GGML_PRINT_DEBUG ("=== iter %d ===\n", t); + + GGML_PRINT_DEBUG ("f = %10.6f\n", ggml_get_f32_1d(f, 0)); + GGML_PRINT_DEBUG_5("df/dx0 = %10.6f\n", ggml_get_f32_1d(ps[0]->grad, 0)); + GGML_PRINT_DEBUG_5("df/dx1 = %10.6f\n", ggml_get_f32_1d(ps[1]->grad, 0)); + + for (int i = 0; i < np; ++i) { + GGML_PRINT_DEBUG("param %d: %10.6f, g = %10.6f\n", i, + ggml_get_f32_1d(ps[i], 0), ggml_get_f32_1d(ps[i]->grad, 0)); + } + + const int64_t t_start_wall = ggml_time_us(); + const int64_t t_start_cpu = ggml_cycles(); + UNUSED(t_start_wall); + UNUSED(t_start_cpu); + + { + // update the gradient + ggml_opt_get_grad(np, ps, g1); + + // m_t = beta1*m_t-1 + (1 - beta1)*g_t + ggml_vec_scale_f32(nx, m, beta1); + ggml_vec_mad_f32 (nx, m, g1, 1.0f - beta1); + + // g2 = g1^2 + ggml_vec_sqr_f32 (nx, g2, g1); + + // v_t = beta2*v_t-1 + (1 - beta2)*g_t^2 + ggml_vec_scale_f32(nx, v, beta2); + ggml_vec_mad_f32 (nx, v, g2, 1.0f - beta2); + + // m^hat = m_t / (1 - beta1^t) + // v^hat = v_t / (1 - beta2^t) + // x_t = x_t-1 - sched*(alpha*m^hat/(sqrt(v^hat) + eps) + decay*x_t-1) + // x_t = x_t-1 - sched*alpha*m^hat/(sqrt(v^hat) + eps) - sched*decay*x_t-1 + // x_t = x_t-1*(1-sched*decay) - sched*alpha*m^hat/(sqrt(v^hat) + eps) + // x_t = x_t-1*(1-sched*decay) + sched*decay*(-alpha/decay)*m^hat/(sqrt(v^hat) + eps) + // x_t = mix(x_t-1, (-alpha/decay)*m^hat/(sqrt(v^hat) + eps), sched*decay) + ggml_vec_cpy_f32 (nx, mh, m); + ggml_vec_cpy_f32 (nx, vh, v); + + ggml_vec_scale_f32(nx, mh, alpha/(1.0f - powf(beta1, opt->iter))); + ggml_vec_scale_f32(nx, vh, 1.0f/(1.0f - powf(beta2, opt->iter))); + + ggml_vec_sqrt_f32 (nx, vh, vh); + ggml_vec_acc1_f32 (nx, vh, eps); + + ggml_vec_div_f32 (nx, mh, mh, vh); + ggml_vec_scale_f32(nx, x, 1.0f - decay); + ggml_vec_sub_f32 (nx, x, x, mh); + + // update the parameters + ggml_opt_set_params(np, ps, x); + } + + ggml_graph_reset (gf); + ggml_set_f32 (f->grad, 1.0f); + ggml_graph_compute(ctx, gb); + + const float fx = ggml_get_f32_1d(f, 0); + + // check convergence + if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) { + GGML_PRINT_DEBUG("converged\n"); + + return GGML_OPT_OK; + } + + // delta-based convergence test + if (pf != NULL) { + // need at least params.past iterations to start checking for convergence + if (params.past <= iter0 + t) { + const float rate = (pf[(iter0 + t)%params.past] - fx)/fx; + + if (fabsf(rate) < params.delta) { + return GGML_OPT_OK; + } + } + + pf[(iter0 + t)%params.past] = fx; + } + + // check for improvement + if (params.max_no_improvement > 0) { + if (fx_best[0] > fx) { + fx_best[0] = fx; + n_no_improvement[0] = 0; + } else { + ++n_no_improvement[0]; + + if (n_no_improvement[0] >= params.max_no_improvement) { + return GGML_OPT_OK; + } + } + } + + fx_prev[0] = fx; + + { + const int64_t t_end_cpu = ggml_cycles(); + GGML_PRINT_DEBUG("time iter: %5.3f s\n", ((float)(t_end_cpu - t_start_cpu))/CLOCKS_PER_SEC); + UNUSED(t_end_cpu); + + const int64_t t_end_wall = ggml_time_us(); + GGML_PRINT_DEBUG("wall time iter: %5.3f s\n", (t_end_wall - t_start_wall)/1e6); + UNUSED(t_end_wall); + } + } + + return GGML_OPT_DID_NOT_CONVERGE; +} + +// +// L-BFGS +// +// the L-BFGS implementation below is based on the following implementation: +// +// https://github.com/chokkan/liblbfgs +// + +struct ggml_lbfgs_iteration_data { + float alpha; + float ys; + float * s; + float * y; +}; + +static enum ggml_opt_result linesearch_backtracking( + struct ggml_context * ctx, + const struct ggml_opt_params * params, + int nx, + float * x, + float * fx, + float * g, + float * d, + float * step, + const float * xp, + struct ggml_tensor * f, + struct ggml_cgraph * gf, + struct ggml_cgraph * gb, + const int np, + struct ggml_tensor * ps[]) { + int count = 0; + + float width = 0.0f; + float dg = 0.0f; + float finit = 0.0f; + float dginit = 0.0f; + float dgtest = 0.0f; + + const float dec = 0.5f; + const float inc = 2.1f; + + if (*step <= 0.f) { + return GGML_LINESEARCH_INVALID_PARAMETERS; + } + + // compute the initial gradient in the search direction + ggml_vec_dot_f32(nx, &dginit, g, d); + + // make sure that d points to a descent direction + if (0 < dginit) { + return GGML_LINESEARCH_FAIL; + } + + // initialize local variables + finit = *fx; + dgtest = params->lbfgs.ftol*dginit; + + while (true) { + ggml_vec_cpy_f32(nx, x, xp); + ggml_vec_mad_f32(nx, x, d, *step); + + // evaluate the function and gradient values + { + ggml_opt_set_params(np, ps, x); + + ggml_graph_reset (gf); + ggml_set_f32 (f->grad, 1.0f); + ggml_graph_compute(ctx, gb); + + ggml_opt_get_grad(np, ps, g); + + *fx = ggml_get_f32_1d(f, 0); + } + + ++count; + + if (*fx > finit + (*step)*dgtest) { + width = dec; + } else { + // Armijo condition is satisfied + if (params->lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_ARMIJO) { + return count; + } + + ggml_vec_dot_f32(nx, &dg, g, d); + + // check the Wolfe condition + if (dg < params->lbfgs.wolfe * dginit) { + width = inc; + } else { + if(params->lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE) { + // regular Wolfe conditions + return count; + } + + if(dg > -params->lbfgs.wolfe*dginit) { + width = dec; + } else { + // strong Wolfe condition (GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) + return count; + } + return count; + } + } + + if (*step < params->lbfgs.min_step) { + return GGML_LINESEARCH_MINIMUM_STEP; + } + if (*step > params->lbfgs.max_step) { + return GGML_LINESEARCH_MAXIMUM_STEP; + } + if (params->lbfgs.max_linesearch <= count) { + return GGML_LINESEARCH_MAXIMUM_ITERATIONS; + } + + (*step) *= width; + } + + return GGML_LINESEARCH_FAIL; +} + +static enum ggml_opt_result ggml_opt_lbfgs( + struct ggml_context * ctx, + struct ggml_opt_context * opt, + struct ggml_opt_params params, + struct ggml_tensor * f, + struct ggml_cgraph * gf, + struct ggml_cgraph * gb) { + if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE || + params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { + if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) { + return GGML_OPT_INVALID_WOLFE; + } + } + + gf->n_threads = params.n_threads; + gb->n_threads = params.n_threads; + + const int m = params.lbfgs.m; + + // these will store the parameters we want to optimize + struct ggml_tensor * ps[GGML_MAX_PARAMS]; + + int np = 0; + int nx = 0; + for (int i = 0; i < gf->n_nodes; ++i) { + if (gf->nodes[i]->is_param) { + GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op); + + GGML_ASSERT(np < GGML_MAX_PARAMS); + + ps[np++] = gf->nodes[i]; + nx += ggml_nelements(gf->nodes[i]); + } + } + + if ((opt->params.type != params.type) || (opt->nx != nx) || (opt->params.past != params.past) || (opt->params.lbfgs.m != params.lbfgs.m)) { + int iter = opt->iter; + ggml_opt_init(ctx, opt, params, nx); + opt->iter = iter; + } + + float * x = opt->lbfgs.x->data; // current parameters + float * xp = opt->lbfgs.xp->data; // previous parameters + float * g = opt->lbfgs.g->data; // current gradient + float * gp = opt->lbfgs.gp->data; // previous gradient + float * d = opt->lbfgs.d->data; // search direction + + float * pf = params.past > 0 ? opt->lbfgs.pf->data : NULL; // past function values + + float fx = 0.0f; // cost function value + float xnorm = 0.0f; // ||x|| + float gnorm = 0.0f; // ||g|| + + // initialize x from the graph nodes + ggml_opt_get_params(np, ps, x); + + // the L-BFGS memory + float * lm_alpha = opt->lbfgs.lmal->data; + float * lm_ys = opt->lbfgs.lmys->data; + float * lm_s = opt->lbfgs.lms->data; + float * lm_y = opt->lbfgs.lmy->data; + + // evaluate the function value and its gradient + { + ggml_opt_set_params(np, ps, x); + + ggml_graph_reset (gf); + ggml_set_f32 (f->grad, 1.0f); + ggml_graph_compute(ctx, gb); + + ggml_opt_get_grad(np, ps, g); + + fx = ggml_get_f32_1d(f, 0); + } + + // search direction = -gradient + ggml_vec_neg_f32(nx, d, g); + + // ||x||, ||g|| + ggml_vec_norm_f32(nx, &xnorm, x); + ggml_vec_norm_f32(nx, &gnorm, g); + + if (xnorm < 1.0f) { + xnorm = 1.0f; + } + + // already optimized + if (gnorm/xnorm <= params.lbfgs.eps) { + return GGML_OPT_OK; + } + + if (opt->just_initialized) { + if (pf) { + pf[0] = fx; + } + opt->lbfgs.fx_best = fx; + + // initial step + ggml_vec_norm_inv_f32(nx, &opt->lbfgs.step, d); + opt->lbfgs.j = 0; + opt->lbfgs.k = 1; + opt->lbfgs.end = 0; + opt->lbfgs.n_no_improvement = 0; + opt->just_initialized = false; + } + + float * fx_best = &opt->lbfgs.fx_best; + float * step = &opt->lbfgs.step; + int * j = &opt->lbfgs.j; + int * k = &opt->lbfgs.k; + int * end = &opt->lbfgs.end; + int * n_no_improvement = &opt->lbfgs.n_no_improvement; + + int ls = 0; + int bound = 0; + + float ys = 0.0f; + float yy = 0.0f; + float beta = 0.0f; + + int it = 0; + + while (true) { + // store the current position and gradient vectors + ggml_vec_cpy_f32(nx, xp, x); + ggml_vec_cpy_f32(nx, gp, g); + + ls = linesearch_backtracking(ctx, ¶ms, nx, x, &fx, g, d, step, xp, f, gf, gb, np, ps); + + if (ls < 0) { + // linesearch failed - go back to the previous point and return + ggml_vec_cpy_f32(nx, x, xp); + ggml_vec_cpy_f32(nx, g, gp); + + return ls; + } + + ggml_vec_norm_f32(nx, &xnorm, x); + ggml_vec_norm_f32(nx, &gnorm, g); + + GGML_PRINT_DEBUG("f = %10.6f\n", ggml_get_f32_1d(f, 0)); + + if (xnorm < 1.0f) { + xnorm = 1.0f; + } + if (gnorm/xnorm <= params.lbfgs.eps) { + // converged + return GGML_OPT_OK; + } + + // delta-based convergence test + if (pf != NULL) { + // need at least params.past iterations to start checking for convergence + if (params.past <= k[0]) { + const float rate = (pf[k[0]%params.past] - fx)/fx; + + if (fabsf(rate) < params.delta) { + return GGML_OPT_OK; + } + } + + pf[k[0]%params.past] = fx; + } + + // check for improvement + if (params.max_no_improvement > 0) { + if (fx < fx_best[0]) { + fx_best[0] = fx; + n_no_improvement[0] = 0; + } else { + n_no_improvement[0]++; + + if (n_no_improvement[0] >= params.max_no_improvement) { + return GGML_OPT_OK; + } + } + } + + if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) { + // reached the maximum number of iterations + return GGML_OPT_DID_NOT_CONVERGE; + } + + // update vectors s and y: + // s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. + // y_{k+1} = g_{k+1} - g_{k}. + // + ggml_vec_sub_f32(nx, &lm_s[end[0]*nx], x, xp); + ggml_vec_sub_f32(nx, &lm_y[end[0]*nx], g, gp); + + // compute scalars ys and yy: + // ys = y^t \cdot s -> 1 / \rho. + // yy = y^t \cdot y. + // + ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0] *nx]); + ggml_vec_dot_f32(nx, &yy, &lm_y[end[0]*nx], &lm_y[end[0]*nx]); + + lm_ys[end[0]] = ys; + + // find new search direction + // ref: https://en.wikipedia.org/wiki/Limited-memory_BFGS + + bound = (m <= k[0]) ? m : k[0]; + k[0]++; + it++; + end[0] = (end[0] + 1)%m; + + // initialize search direction with -g + ggml_vec_neg_f32(nx, d, g); + + j[0] = end[0]; + for (int i = 0; i < bound; ++i) { + j[0] = (j[0] + m - 1) % m; + // \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1} + ggml_vec_dot_f32(nx, &lm_alpha[j[0]], &lm_s[j[0]*nx], d); + lm_alpha[j[0]] /= lm_ys[j[0]]; + // q_{i} = q_{i+1} - \alpha_{i} y_{i} + ggml_vec_mad_f32(nx, d, &lm_y[j[0]*nx], -lm_alpha[j[0]]); + } + + ggml_vec_scale_f32(nx, d, ys/yy); + + for (int i = 0; i < bound; ++i) { + // \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i} + ggml_vec_dot_f32(nx, &beta, &lm_y[j[0]*nx], d); + beta /= lm_ys[j[0]]; + // \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j} + ggml_vec_mad_f32(nx, d, &lm_s[j[0]*nx], lm_alpha[j[0]] - beta); + j[0] = (j[0] + 1)%m; + } + + step[0] = 1.0; + } + + return GGML_OPT_DID_NOT_CONVERGE; +} + +struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) { + struct ggml_opt_params result; + + switch (type) { + case GGML_OPT_ADAM: + { + result = (struct ggml_opt_params) { + .type = GGML_OPT_ADAM, + .n_threads = 1, + .past = 0, + .delta = 1e-5f, + + .max_no_improvement = 100, + + .print_forward_graph = true, + .print_backward_graph = true, + + .adam = { + .n_iter = 10000, + .sched = 1.000f, + .decay = 0.001f, + .alpha = 0.001f, + .beta1 = 0.9f, + .beta2 = 0.999f, + .eps = 1e-8f, + .eps_f = 1e-5f, + .eps_g = 1e-3f, + }, + }; + } break; + case GGML_OPT_LBFGS: + { + result = (struct ggml_opt_params) { + .type = GGML_OPT_LBFGS, + .n_threads = 1, + .past = 0, + .delta = 1e-5f, + + .max_no_improvement = 0, + + .print_forward_graph = true, + .print_backward_graph = true, + + .lbfgs = { + .m = 6, + .n_iter = 100, + .max_linesearch = 20, + + .eps = 1e-5f, + .ftol = 1e-4f, + .wolfe = 0.9f, + .min_step = 1e-20f, + .max_step = 1e+20f, + + .linesearch = GGML_LINESEARCH_DEFAULT, + }, + }; + } break; + } + + return result; +} + +GGML_API void ggml_opt_init( + struct ggml_context * ctx, + struct ggml_opt_context * opt, + struct ggml_opt_params params, + int64_t nx) { + opt->ctx = ctx; + opt->params = params; + opt->iter = 0; + opt->nx = nx; + opt->just_initialized = true; + switch (opt->params.type) { + case GGML_OPT_ADAM: + { + opt->adam.x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->adam.g1 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->adam.g2 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->adam.m = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->adam.v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->adam.mh = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->adam.vh = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->adam.pf = params.past > 0 + ? ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.past) + : NULL; + ggml_set_zero(opt->adam.x); + ggml_set_zero(opt->adam.g1); + ggml_set_zero(opt->adam.g2); + ggml_set_zero(opt->adam.m); + ggml_set_zero(opt->adam.v); + ggml_set_zero(opt->adam.mh); + ggml_set_zero(opt->adam.vh); + if (opt->adam.pf) { + ggml_set_zero(opt->adam.pf); + } + } break; + case GGML_OPT_LBFGS: + { + opt->lbfgs.x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->lbfgs.xp = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->lbfgs.g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->lbfgs.gp = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->lbfgs.d = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); + opt->lbfgs.pf = params.past > 0 + ? ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.past) + : NULL; + opt->lbfgs.lmal = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.lbfgs.m); + opt->lbfgs.lmys = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.lbfgs.m); + opt->lbfgs.lms = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, params.lbfgs.m); + opt->lbfgs.lmy = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, params.lbfgs.m); + ggml_set_zero(opt->lbfgs.x); + ggml_set_zero(opt->lbfgs.xp); + ggml_set_zero(opt->lbfgs.g); + ggml_set_zero(opt->lbfgs.gp); + ggml_set_zero(opt->lbfgs.d); + ggml_set_zero(opt->lbfgs.pf); + if (opt->lbfgs.pf) { + ggml_set_zero(opt->lbfgs.pf); + } + ggml_set_zero(opt->lbfgs.lmal); + ggml_set_zero(opt->lbfgs.lmys); + ggml_set_zero(opt->lbfgs.lms); + ggml_set_zero(opt->lbfgs.lmy); + } break; + } +} + +enum ggml_opt_result ggml_opt( + struct ggml_context * ctx, + struct ggml_opt_params params, + struct ggml_tensor * f) { + bool free_ctx = false; + if (ctx == NULL) { + struct ggml_init_params params_ctx = { + .mem_size = 16*1024*1024, + .mem_buffer = NULL, + .no_alloc = false, + }; + + ctx = ggml_init(params_ctx); + if (ctx == NULL) { + return GGML_OPT_NO_CONTEXT; + } + + free_ctx = true; + } + + enum ggml_opt_result result = GGML_OPT_OK; + + struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context)); + + ggml_opt_init(ctx, opt, params, 0); + result = ggml_opt_resume(ctx, opt, f); + + if (free_ctx) { + ggml_free(ctx); + } + + return result; +} + +enum ggml_opt_result ggml_opt_resume( + struct ggml_context * ctx, + struct ggml_opt_context * opt, + struct ggml_tensor * f) { + + // build forward + backward compute graphs + struct ggml_tensor * gfbuf = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / GGML_TYPE_SIZE[GGML_TYPE_I32]+ (sizeof(struct ggml_cgraph) % GGML_TYPE_SIZE[GGML_TYPE_I32] ? 1 : 0)); + struct ggml_tensor * gbbuf = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / GGML_TYPE_SIZE[GGML_TYPE_I32]+ (sizeof(struct ggml_cgraph) % GGML_TYPE_SIZE[GGML_TYPE_I32] ? 1 : 0)); + + struct ggml_cgraph * gf = (struct ggml_cgraph *) gfbuf->data; + struct ggml_cgraph * gb = (struct ggml_cgraph *) gbbuf->data; + + *gf = ggml_build_forward (f); + *gb = ggml_build_backward(ctx, gf, true); + + return ggml_opt_resume_g(ctx, opt, f, gf, gb); +} + +enum ggml_opt_result ggml_opt_resume_g( + struct ggml_context * ctx, + struct ggml_opt_context * opt, + struct ggml_tensor * f, + struct ggml_cgraph * gf, + struct ggml_cgraph * gb) { + + // build forward + backward compute graphs + enum ggml_opt_result result = GGML_OPT_OK; + + switch (opt->params.type) { + case GGML_OPT_ADAM: + { + result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb); + } break; + case GGML_OPT_LBFGS: + { + result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb); + } break; + } + + if (opt->params.print_forward_graph) { + ggml_graph_print (gf); + ggml_graph_dump_dot(gf, NULL, "opt-forward.dot"); + } + + if (opt->params.print_backward_graph) { + ggml_graph_print (gb); + ggml_graph_dump_dot(gb, gf, "opt-backward.dot"); + } + + return result; +} + +//////////////////////////////////////////////////////////////////////////////// + +size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist) { + assert(k % QK4_0 == 0); + const int nb = k / QK4_0; + + for (int b = 0; b < n; b += k) { + block_q4_0 * restrict y = (block_q4_0 *) dst + b/QK4_0; + + quantize_row_q4_0_reference(src + b, y, k); + + for (int i = 0; i < nb; i++) { + for (int j = 0; j < QK4_0; j += 2) { + const uint8_t vi0 = y[i].qs[j/2] & 0x0F; + const uint8_t vi1 = y[i].qs[j/2] >> 4; + + hist[vi0]++; + hist[vi1]++; + } + } + } + + return (n/QK4_0*sizeof(block_q4_0)); +} + +size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist) { + assert(k % QK4_1 == 0); + const int nb = k / QK4_1; + + for (int b = 0; b < n; b += k) { + block_q4_1 * restrict y = (block_q4_1 *) dst + b/QK4_1; + + quantize_row_q4_1_reference(src + b, y, k); + + for (int i = 0; i < nb; i++) { + for (int j = 0; j < QK4_1; j += 2) { + const uint8_t vi0 = y[i].qs[j/2] & 0x0F; + const uint8_t vi1 = y[i].qs[j/2] >> 4; + + hist[vi0]++; + hist[vi1]++; + } + } + } + + return (n/QK4_1*sizeof(block_q4_1)); +} + +size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist) { + assert(k % QK5_0 == 0); + const int nb = k / QK5_0; + + for (int b = 0; b < n; b += k) { + block_q5_0 * restrict y = (block_q5_0 *)dst + b/QK5_0; + + quantize_row_q5_0_reference(src + b, y, k); + + for (int i = 0; i < nb; i++) { + uint32_t qh; + memcpy(&qh, &y[i].qh, sizeof(qh)); + + for (int j = 0; j < QK5_0; j += 2) { + const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4; + const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12)); + + // cast to 16 bins + const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2; + const uint8_t vi1 = ((y[i].qs[j/2] >> 4) | vh1) / 2; + + hist[vi0]++; + hist[vi1]++; + } + } + } + + return (n/QK5_0*sizeof(block_q5_0)); +} + +size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist) { + assert(k % QK5_1 == 0); + const int nb = k / QK5_1; + + for (int b = 0; b < n; b += k) { + block_q5_1 * restrict y = (block_q5_1 *)dst + b/QK5_1; + + quantize_row_q5_1_reference(src + b, y, k); + + for (int i = 0; i < nb; i++) { + uint32_t qh; + memcpy(&qh, &y[i].qh, sizeof(qh)); + + for (int j = 0; j < QK5_1; j += 2) { + const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4; + const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12)); + + // cast to 16 bins + const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2; + const uint8_t vi1 = ((y[i].qs[j/2] >> 4) | vh1) / 2; + + hist[vi0]++; + hist[vi1]++; + } + } + } + + return (n/QK5_1*sizeof(block_q5_1)); +} + +size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist) { + assert(k % QK8_0 == 0); + const int nb = k / QK8_0; + + for (int b = 0; b < n; b += k) { + block_q8_0 * restrict y = (block_q8_0 *)dst + b/QK8_0; + + quantize_row_q8_0_reference(src + b, y, k); + + for (int i = 0; i < nb; i++) { + for (int j = 0; j < QK8_0; ++j) { + const int8_t vi = y[i].qs[j]; + + hist[vi/16 + 8]++; + } + } + } + + return (n/QK8_0*sizeof(block_q8_0)); +} + +size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist) { + size_t result = 0; + switch (type) { + case GGML_TYPE_Q4_0: + { + GGML_ASSERT(start % QK4_0 == 0); + block_q4_0 * block = (block_q4_0*)dst + start / QK4_0; + result = ggml_quantize_q4_0(src + start, block, n, n, hist); + } break; + case GGML_TYPE_Q4_1: + { + GGML_ASSERT(start % QK4_1 == 0); + block_q4_1 * block = (block_q4_1*)dst + start / QK4_1; + result = ggml_quantize_q4_1(src + start, block, n, n, hist); + } break; + case GGML_TYPE_Q5_0: + { + GGML_ASSERT(start % QK5_0 == 0); + block_q5_0 * block = (block_q5_0*)dst + start / QK5_0; + result = ggml_quantize_q5_0(src + start, block, n, n, hist); + } break; + case GGML_TYPE_Q5_1: + { + GGML_ASSERT(start % QK5_1 == 0); + block_q5_1 * block = (block_q5_1*)dst + start / QK5_1; + result = ggml_quantize_q5_1(src + start, block, n, n, hist); + } break; + case GGML_TYPE_Q8_0: + { + GGML_ASSERT(start % QK8_0 == 0); + block_q8_0 * block = (block_q8_0*)dst + start / QK8_0; + result = ggml_quantize_q8_0(src + start, block, n, n, hist); + } break; +#ifdef GGML_USE_K_QUANTS + case GGML_TYPE_Q2_K: + { + GGML_ASSERT(start % QK_K == 0); + block_q2_K * block = (block_q2_K*)dst + start / QK_K; + result = ggml_quantize_q2_K(src + start, block, n, n, hist); + } break; + case GGML_TYPE_Q3_K: + { + GGML_ASSERT(start % QK_K == 0); + block_q3_K * block = (block_q3_K*)dst + start / QK_K; + result = ggml_quantize_q3_K(src + start, block, n, n, hist); + } break; + case GGML_TYPE_Q4_K: + { + GGML_ASSERT(start % QK_K == 0); + block_q4_K * block = (block_q4_K*)dst + start / QK_K; + result = ggml_quantize_q4_K(src + start, block, n, n, hist); + } break; + case GGML_TYPE_Q5_K: + { + GGML_ASSERT(start % QK_K == 0); + block_q5_K * block = (block_q5_K*)dst + start / QK_K; + result = ggml_quantize_q5_K(src + start, block, n, n, hist); + } break; + case GGML_TYPE_Q6_K: + { + GGML_ASSERT(start % QK_K == 0); + block_q6_K * block = (block_q6_K*)dst + start / QK_K; + result = ggml_quantize_q6_K(src + start, block, n, n, hist); + } break; +#endif + case GGML_TYPE_F16: + { + int elemsize = sizeof(ggml_fp16_t); + ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n); + result = n * elemsize; + } break; + case GGML_TYPE_F32: + { + int elemsize = sizeof(float); + result = n * elemsize; + memcpy((uint8_t *)dst + start * elemsize, src + start, result); + } break; + default: + assert(false); + } + return result; +} + +//////////////////////////////////////////////////////////////////////////////// + +int ggml_cpu_has_avx(void) { +#if defined(__AVX__) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_avx2(void) { +#if defined(__AVX2__) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_avx512(void) { +#if defined(__AVX512F__) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_avx512_vbmi(void) { +#if defined(__AVX512VBMI__) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_avx512_vnni(void) { +#if defined(__AVX512VNNI__) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_fma(void) { +#if defined(__FMA__) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_neon(void) { +#if defined(__ARM_NEON) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_arm_fma(void) { +#if defined(__ARM_FEATURE_FMA) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_f16c(void) { +#if defined(__F16C__) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_fp16_va(void) { +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_wasm_simd(void) { +#if defined(__wasm_simd128__) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_blas(void) { +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_cublas(void) { +#if defined(GGML_USE_CUBLAS) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_clblast(void) { +#if defined(GGML_USE_CLBLAST) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_gpublas(void) { + return ggml_cpu_has_cublas() || ggml_cpu_has_clblast(); +} + +int ggml_cpu_has_sse3(void) { +#if defined(__SSE3__) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_vsx(void) { +#if defined(__POWER9_VECTOR__) + return 1; +#else + return 0; +#endif +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/ctransformers/models/ggml/ggml.h b/ctransformers/models/ggml/ggml.h new file mode 100644 index 0000000000000000000000000000000000000000..b19b5900c395ee2d115f5f94fa622def0f84336d --- /dev/null +++ b/ctransformers/models/ggml/ggml.h @@ -0,0 +1,1323 @@ +#pragma once + +// +// GGML Tensor Library +// +// This documentation is still a work in progress. +// If you wish some specific topics to be covered, feel free to drop a comment: +// +// https://github.com/ggerganov/whisper.cpp/issues/40 +// +// ## Overview +// +// This library implements: +// +// - a set of tensor operations +// - automatic differentiation +// - basic optimization algorithms +// +// The aim of this library is to provide a minimalistic approach for various machine learning tasks. This includes, +// but is not limited to, the following: +// +// - linear regression +// - support vector machines +// - neural networks +// +// The library allows the user to define a certain function using the available tensor operations. This function +// definition is represented internally via a computation graph. Each tensor operation in the function definition +// corresponds to a node in the graph. Having the computation graph defined, the user can choose to compute the +// function's value and/or its gradient with respect to the input variables. Optionally, the function can be optimized +// using one of the available optimization algorithms. +// +// For example, here we define the function: f(x) = a*x^2 + b +// +// { +// struct ggml_init_params params = { +// .mem_size = 16*1024*1024, +// .mem_buffer = NULL, +// }; +// +// // memory allocation happens here +// struct ggml_context * ctx = ggml_init(params); +// +// struct ggml_tensor * x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1); +// +// ggml_set_param(ctx, x); // x is an input variable +// +// struct ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1); +// struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1); +// struct ggml_tensor * x2 = ggml_mul(ctx, x, x); +// struct ggml_tensor * f = ggml_add(ctx, ggml_mul(ctx, a, x2), b); +// +// ... +// } +// +// Notice that the function definition above does not involve any actual computation. The computation is performed only +// when the user explicitly requests it. For example, to compute the function's value at x = 2.0: +// +// { +// ... +// +// struct ggml_cgraph gf = ggml_build_forward(f); +// +// // set the input variable and parameter values +// ggml_set_f32(x, 2.0f); +// ggml_set_f32(a, 3.0f); +// ggml_set_f32(b, 4.0f); +// +// ggml_graph_compute(ctx0, &gf); +// +// printf("f = %f\n", ggml_get_f32_1d(f, 0)); +// +// ... +// } +// +// The actual computation is performed in the ggml_graph_compute() function. +// +// The ggml_new_tensor_...() functions create new tensors. They are allocated in the memory buffer provided to the +// ggml_init() function. You have to be careful not to exceed the memory buffer size. Therefore, you have to know +// in advance how much memory you need for your computation. Alternatively, you can allocate a large enough memory +// and after defining the computation graph, call the ggml_used_mem() function to find out how much memory was +// actually needed. +// +// The ggml_set_param() function marks a tensor as an input variable. This is used by the automatic +// differentiation and optimization algorithms. +// +// The described approach allows to define the function graph once and then compute its forward or backward graphs +// multiple times. All computations will use the same memory buffer allocated in the ggml_init() function. This way +// the user can avoid the memory allocation overhead at runtime. +// +// The library supports multi-dimensional tensors - up to 4 dimensions. The FP16 and FP32 data types are first class +// citizens, but in theory the library can be extended to support FP8 and integer data types. +// +// Each tensor operation produces a new tensor. Initially the library was envisioned to support only the use of unary +// and binary operations. Most of the available operations fall into one of these two categories. With time, it became +// clear that the library needs to support more complex operations. The way to support these operations is not clear +// yet, but a few examples are demonstrated in the following operations: +// +// - ggml_permute() +// - ggml_conv_1d_1s() +// - ggml_conv_1d_2s() +// +// For each tensor operator, the library implements a forward and backward computation function. The forward function +// computes the output tensor value given the input tensor values. The backward function computes the adjoint of the +// input tensors given the adjoint of the output tensor. For a detailed explanation of what this means, take a +// calculus class, or watch the following video: +// +// What is Automatic Differentiation? +// https://www.youtube.com/watch?v=wG_nF1awSSY +// +// +// ## Tensor data (struct ggml_tensor) +// +// The tensors are stored in memory via the ggml_tensor struct. The structure provides information about the size of +// the tensor, the data type, and the memory buffer where the tensor data is stored. Additionally, it contains +// pointers to the "source" tensors - i.e. the tensors that were used to compute the current tensor. For example: +// +// { +// struct ggml_tensor * c = ggml_add(ctx, a, b); +// +// assert(c->src[0] == a); +// assert(c->src[1] == b); +// } +// +// The multi-dimensional tensors are stored in row-major order. The ggml_tensor struct contains fields for the +// number of elements in each dimension ("ne") as well as the number of bytes ("nb", a.k.a. stride). This allows +// to store tensors that are not contiguous in memory, which is useful for operations such as transposition and +// permutation. All tensor operations have to take the stride into account and not assume that the tensor is +// contiguous in memory. +// +// The data of the tensor is accessed via the "data" pointer. For example: +// +// { +// struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3); +// +// // a[1, 2] = 1.0f; +// *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f; +// +// // a[2, 0] = 2.0f; +// *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f; +// +// ... +// } +// +// Alternatively, there are helper functions, such as ggml_get_f32_1d() and ggml_set_f32_1d() that can be used. +// +// ## The matrix multiplication operator (ggml_mul_mat) +// +// TODO +// +// +// ## Multi-threading +// +// TODO +// +// +// ## Overview of ggml.c +// +// TODO +// +// +// ## SIMD optimizations +// +// TODO +// +// +// ## Debugging ggml +// +// TODO +// +// + +#ifdef GGML_SHARED +# if defined(_WIN32) && !defined(__MINGW32__) +# ifdef GGML_BUILD +# define GGML_API __declspec(dllexport) +# else +# define GGML_API __declspec(dllimport) +# endif +# else +# define GGML_API __attribute__ ((visibility ("default"))) +# endif +#else +# define GGML_API +#endif + +#include +#include +#include + +#define GGML_FILE_MAGIC 0x67676d6c // "ggml" +#define GGML_FILE_VERSION 1 + +#define GGML_QNT_VERSION 2 // bump this on quantization format changes +#define GGML_QNT_VERSION_FACTOR 1000 // do not change this + +#define GGML_MAX_DIMS 4 +#define GGML_MAX_NODES 4096 +#define GGML_MAX_PARAMS 256 +#define GGML_MAX_CONTEXTS 64 +#define GGML_MAX_OPT 4 +#define GGML_MAX_NAME 32 +#define GGML_DEFAULT_N_THREADS 4 + +#define GGML_ASSERT(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \ + abort(); \ + } \ + } while (0) + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __ARM_NEON + // we use the built-in 16-bit float type + typedef __fp16 ggml_fp16_t; +#else + typedef uint16_t ggml_fp16_t; +#endif + + // convert FP16 <-> FP32 + GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x); + GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x); + + GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n); + GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n); + + struct ggml_object; + struct ggml_context; + + enum ggml_type { + GGML_TYPE_F32 = 0, + GGML_TYPE_F16 = 1, + GGML_TYPE_Q4_0 = 2, + GGML_TYPE_Q4_1 = 3, + // GGML_TYPE_Q4_2 = 4, support has been removed + // GGML_TYPE_Q4_3 (5) support has been removed + GGML_TYPE_Q5_0 = 6, + GGML_TYPE_Q5_1 = 7, + GGML_TYPE_Q8_0 = 8, + GGML_TYPE_Q8_1 = 9, + // k-quantizations + GGML_TYPE_Q2_K = 10, + GGML_TYPE_Q3_K = 11, + GGML_TYPE_Q4_K = 12, + GGML_TYPE_Q5_K = 13, + GGML_TYPE_Q6_K = 14, + GGML_TYPE_Q8_K = 15, + GGML_TYPE_I8, + GGML_TYPE_I16, + GGML_TYPE_I32, + GGML_TYPE_COUNT, + }; + + enum ggml_backend { + GGML_BACKEND_CPU = 0, + GGML_BACKEND_GPU = 10, + GGML_BACKEND_GPU_SPLIT = 20, + }; + + // model file types + enum ggml_ftype { + GGML_FTYPE_UNKNOWN = -1, + GGML_FTYPE_ALL_F32 = 0, + GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors + GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors + GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors + GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 + GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors + GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors + GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors + GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors + GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors + GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors + GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors + GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors + }; + + // available tensor operations: + enum ggml_op { + GGML_OP_NONE = 0, + + GGML_OP_DUP, + GGML_OP_ADD, + GGML_OP_ADD1, + GGML_OP_ACC, + GGML_OP_SUB, + GGML_OP_MUL, + GGML_OP_DIV, + GGML_OP_SQR, + GGML_OP_SQRT, + GGML_OP_LOG, + GGML_OP_SUM, + GGML_OP_SUM_ROWS, + GGML_OP_MEAN, + GGML_OP_REPEAT, + GGML_OP_REPEAT_BACK, + GGML_OP_REPEAT2, + GGML_OP_ABS, + GGML_OP_SGN, + GGML_OP_NEG, + GGML_OP_STEP, + GGML_OP_RELU, + GGML_OP_GELU, + GGML_OP_SILU, + GGML_OP_SILU_BACK, + GGML_OP_NORM, // normalize + GGML_OP_RMS_NORM, + GGML_OP_RMS_NORM_BACK, + + GGML_OP_MUL_MAT, + GGML_OP_OUT_PROD, + + GGML_OP_SCALE, + GGML_OP_SET, + GGML_OP_CPY, + GGML_OP_CONT, + GGML_OP_RESHAPE, + GGML_OP_VIEW, + GGML_OP_PERMUTE, + GGML_OP_TRANSPOSE, + GGML_OP_GET_ROWS, + GGML_OP_GET_ROWS_BACK, + GGML_OP_DIAG, + GGML_OP_DIAG_MASK_INF, + GGML_OP_DIAG_MASK_ZERO, + GGML_OP_SOFT_MAX, + GGML_OP_SOFT_MAX_BACK, + GGML_OP_ROPE, + GGML_OP_ROPE_BACK, + GGML_OP_ALIBI, + GGML_OP_CLAMP, + GGML_OP_CONV_1D_1S, + GGML_OP_CONV_1D_2S, + + GGML_OP_FLASH_ATTN, + GGML_OP_FLASH_FF, + GGML_OP_FLASH_ATTN_BACK, + + GGML_OP_MAP_UNARY, + GGML_OP_MAP_BINARY, + + GGML_OP_CROSS_ENTROPY_LOSS, + GGML_OP_CROSS_ENTROPY_LOSS_BACK, + + GGML_OP_COUNT, + }; + + + // ggml object + struct ggml_object { + size_t offs; + size_t size; + + struct ggml_object * next; + + char padding[8]; + }; + + static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object); + + // n-dimensional tensor + struct ggml_tensor { + enum ggml_type type; + enum ggml_backend backend; + + int n_dims; + int64_t ne[GGML_MAX_DIMS]; // number of elements + size_t nb[GGML_MAX_DIMS]; // stride in bytes: + // nb[0] = sizeof(type) + // nb[1] = nb[0] * ne[0] + padding + // nb[i] = nb[i-1] * ne[i-1] + + // compute data + enum ggml_op op; + + bool is_param; + + struct ggml_tensor * grad; + struct ggml_tensor * src0; + struct ggml_tensor * src1; + struct ggml_tensor * opt[GGML_MAX_OPT]; + + // thread scheduling + int n_tasks; + + // performance + int perf_runs; + int64_t perf_cycles; + int64_t perf_time_us; + + void * data; + + char name[GGML_MAX_NAME]; + + void * extra; // extra things e.g. for ggml-cuda.cu + + char padding[4]; + }; + + static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); + + // computation graph + struct ggml_cgraph { + int n_nodes; + int n_leafs; + int n_threads; + + size_t work_size; + struct ggml_tensor * work; + + struct ggml_tensor * nodes[GGML_MAX_NODES]; + struct ggml_tensor * grads[GGML_MAX_NODES]; + struct ggml_tensor * leafs[GGML_MAX_NODES]; + + // performance + int perf_runs; + int64_t perf_cycles; + int64_t perf_time_us; + }; + + // scratch buffer + struct ggml_scratch { + size_t offs; + size_t size; + void * data; + }; + + struct ggml_init_params { + // memory pool + size_t mem_size; // bytes + void * mem_buffer; // if NULL, memory will be allocated internally + bool no_alloc; // don't allocate memory for the tensor data + }; + + + // compute types + enum ggml_task_type { + GGML_TASK_INIT = 0, + GGML_TASK_COMPUTE, + GGML_TASK_FINALIZE, + }; + + struct ggml_compute_params { + enum ggml_task_type type; + + // ith = thread index, nth = number of threads + int ith, nth; + + // work buffer for all threads + size_t wsize; + void * wdata; + }; + + // misc + + GGML_API void ggml_time_init(void); // call this once at the beginning of the program + GGML_API int64_t ggml_time_ms(void); + GGML_API int64_t ggml_time_us(void); + GGML_API int64_t ggml_cycles(void); + GGML_API int64_t ggml_cycles_per_ms(void); + + GGML_API void ggml_print_object (const struct ggml_object * obj); + GGML_API void ggml_print_objects(const struct ggml_context * ctx); + + GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor); + GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor); + GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor); + GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split); + + GGML_API int ggml_blck_size (enum ggml_type type); + GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block + GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float + + GGML_API const char * ggml_type_name(enum ggml_type type); + GGML_API const char * ggml_op_name (enum ggml_op op); + + GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor); + + GGML_API bool ggml_is_quantized(enum ggml_type type); + + // TODO: temporary until model loading of ggml examples is refactored + GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype); + + GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor); + GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor); + GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor); + + // use this to compute the memory overhead of a tensor + GGML_API size_t ggml_tensor_overhead(void); + + // main + + GGML_API struct ggml_context * ggml_init(struct ggml_init_params params); + GGML_API void ggml_free(struct ggml_context * ctx); + + GGML_API size_t ggml_used_mem(const struct ggml_context * ctx); + + GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch); + GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc); + + GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx); + GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx); + GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx); + + GGML_API struct ggml_tensor * ggml_new_tensor( + struct ggml_context * ctx, + enum ggml_type type, + int n_dims, + const int64_t *ne); + + GGML_API struct ggml_tensor * ggml_new_tensor_1d( + struct ggml_context * ctx, + enum ggml_type type, + int64_t ne0); + + GGML_API struct ggml_tensor * ggml_new_tensor_2d( + struct ggml_context * ctx, + enum ggml_type type, + int64_t ne0, + int64_t ne1); + + GGML_API struct ggml_tensor * ggml_new_tensor_3d( + struct ggml_context * ctx, + enum ggml_type type, + int64_t ne0, + int64_t ne1, + int64_t ne2); + + GGML_API struct ggml_tensor * ggml_new_tensor_4d( + struct ggml_context * ctx, + enum ggml_type type, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3); + + GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value); + GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value); + + GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src); + GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src); + + GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); + + GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor); + GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value); + GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value); + + GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i); + GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value); + + GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i); + GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value); + + GGML_API void * ggml_get_data (const struct ggml_tensor * tensor); + GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor); + + GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor); + GGML_API void ggml_set_name(struct ggml_tensor * tensor, const char * name); + + // + // operations on tensors with backpropagation + // + + GGML_API struct ggml_tensor * ggml_dup( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_add( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_add_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_add1( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_add1_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_acc( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset); + + GGML_API struct ggml_tensor * ggml_acc_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset); + + GGML_API struct ggml_tensor * ggml_sub( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_mul( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_div( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_sqr( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_sqrt( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_log( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_log_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // return scalar + GGML_API struct ggml_tensor * ggml_sum( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d] + GGML_API struct ggml_tensor * ggml_sum_rows( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // mean along rows + GGML_API struct ggml_tensor * ggml_mean( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // if a is the same shape as b, and a is not parameter, return a + // otherwise, return a new tensor: repeat(a) to fit in b + GGML_API struct ggml_tensor * ggml_repeat( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_repeat_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + #include "ggml/ggml-ggllm.h" + + GGML_API struct ggml_tensor * ggml_abs( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_sgn( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_neg( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_step( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_relu( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // TODO: double-check this computation is correct + GGML_API struct ggml_tensor * ggml_gelu( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_silu( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // a - x + // b - dy + GGML_API struct ggml_tensor * ggml_silu_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // normalize along rows + // TODO: eps is hardcoded to 1e-5 for now + GGML_API struct ggml_tensor * ggml_norm( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_rms_norm( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // a - x + // b - dy + GGML_API struct ggml_tensor * ggml_rms_norm_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // A: n columns, m rows + // B: n columns, p rows (i.e. we transpose it internally) + // result is m columns, p rows + GGML_API struct ggml_tensor * ggml_mul_mat( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // A: m columns, n rows, + // B: p columns, n rows, + // result is m columns, p rows + GGML_API struct ggml_tensor * ggml_out_prod( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // + // operations on tensors without backpropagation + // + + GGML_API struct ggml_tensor * ggml_scale( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_scale_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // b -> view(a,offset,nb1,nb2,3), return modified a + GGML_API struct ggml_tensor * ggml_set( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset); + + // b -> view(a,offset,nb1,nb2,3), return view(a) + GGML_API struct ggml_tensor * ggml_set_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset); + + GGML_API struct ggml_tensor * ggml_set_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t offset); + + GGML_API struct ggml_tensor * ggml_set_1d_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t offset); + + // b -> view(a,offset,nb1,nb2,3), return modified a + GGML_API struct ggml_tensor * ggml_set_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t offset); + + // b -> view(a,offset,nb1,nb2,3), return view(a) + GGML_API struct ggml_tensor * ggml_set_2d_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t offset); + + + // a -> b, return view(b) + GGML_API struct ggml_tensor * ggml_cpy( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // make contiguous + GGML_API struct ggml_tensor * ggml_cont( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // return view(a), b specifies the new shape + // TODO: when we start computing gradient, make a copy instead of view + GGML_API struct ggml_tensor * ggml_reshape( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // return view(a) + // TODO: when we start computing gradient, make a copy instead of view + GGML_API struct ggml_tensor * ggml_reshape_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0); + + GGML_API struct ggml_tensor * ggml_reshape_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1); + + // return view(a) + // TODO: when we start computing gradient, make a copy instead of view + GGML_API struct ggml_tensor * ggml_reshape_3d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2); + + GGML_API struct ggml_tensor * ggml_reshape_4d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3); + + // offset in bytes + GGML_API struct ggml_tensor * ggml_view_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + size_t offset); + + GGML_API struct ggml_tensor * ggml_view_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + size_t nb1, // row stride in bytes + size_t offset); + + GGML_API struct ggml_tensor * ggml_view_3d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + size_t nb1, // row stride in bytes + size_t nb2, // slice stride in bytes + size_t offset); + + GGML_API struct ggml_tensor * ggml_view_4d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + size_t nb1, // row stride in bytes + size_t nb2, // slice stride in bytes + size_t nb3, + size_t offset); + + GGML_API struct ggml_tensor * ggml_permute( + struct ggml_context * ctx, + struct ggml_tensor * a, + int axis0, + int axis1, + int axis2, + int axis3); + + // alias for ggml_permute(ctx, a, 1, 0, 2, 3) + GGML_API struct ggml_tensor * ggml_transpose( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_get_rows( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_get_rows_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c); + + GGML_API struct ggml_tensor * ggml_diag( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // set elements above the diagonal to -INF + GGML_API struct ggml_tensor * ggml_diag_mask_inf( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past); + + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past); + + // set elements above the diagonal to 0 + GGML_API struct ggml_tensor * ggml_diag_mask_zero( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past); + + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past); + + GGML_API struct ggml_tensor * ggml_soft_max( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_soft_max_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_soft_max_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_soft_max_back_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // rotary position embedding + // if mode & 1 == 1, skip n_past elements + // if mode & 2 == 1, GPT-NeoX style + // TODO: avoid creating a new tensor every time + GGML_API struct ggml_tensor * ggml_rope( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_dims, + int mode); + + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_rope_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_dims, + int mode); + + // rotary position embedding backward, i.e compute dx from dy + // a - dy + GGML_API struct ggml_tensor * ggml_rope_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_dims, + int mode); + + // alibi position embedding + // in-place, returns view(a) + struct ggml_tensor * ggml_alibi( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_head, + float bias_max); + + // clamp + // in-place, returns view(a) + struct ggml_tensor * ggml_clamp( + struct ggml_context * ctx, + struct ggml_tensor * a, + float min, + float max); + + // padding = 1 + // TODO: we don't support extra parameters for now + // that's why we are hard-coding the stride, padding, and dilation + // not great .. + GGML_API struct ggml_tensor * ggml_conv_1d_1s( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_conv_1d_2s( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_flash_attn( + struct ggml_context * ctx, + struct ggml_tensor * q, + struct ggml_tensor * k, + struct ggml_tensor * v, + bool masked); + + GGML_API struct ggml_tensor * ggml_flash_attn_back( + struct ggml_context * ctx, + struct ggml_tensor * q, + struct ggml_tensor * k, + struct ggml_tensor * v, + struct ggml_tensor * d, + bool masked); + + GGML_API struct ggml_tensor * ggml_flash_ff( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b0, + struct ggml_tensor * b1, + struct ggml_tensor * c0, + struct ggml_tensor * c1); + + // Mapping operations + typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *); + typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *); + + GGML_API struct ggml_tensor * ggml_map_unary_f32( + struct ggml_context * ctx, + struct ggml_tensor * a, + ggml_unary_op_f32_t fun); + + GGML_API struct ggml_tensor * ggml_map_binary_f32( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + ggml_binary_op_f32_t fun); + + // loss function + + GGML_API struct ggml_tensor * ggml_cross_entropy_loss( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c); + + // + // automatic differentiation + // + + GGML_API void ggml_set_param( + struct ggml_context * ctx, + struct ggml_tensor * tensor); + + GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor); + + GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor); + GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep); + + GGML_API void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph); + GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); + + GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name); + + GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname); + GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval); + + // print info and performance information for the graph + GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph); + + // dump the graph into a file using the dot format + GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename); + + // + // optimization + // + + // optimization methods + enum ggml_opt_type { + GGML_OPT_ADAM, + GGML_OPT_LBFGS, + }; + + // linesearch methods + enum ggml_linesearch { + GGML_LINESEARCH_DEFAULT = 1, + + GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0, + GGML_LINESEARCH_BACKTRACKING_WOLFE = 1, + GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2, + }; + + // optimization return values + enum ggml_opt_result { + GGML_OPT_OK = 0, + GGML_OPT_DID_NOT_CONVERGE, + GGML_OPT_NO_CONTEXT, + GGML_OPT_INVALID_WOLFE, + GGML_OPT_FAIL, + + GGML_LINESEARCH_FAIL = -128, + GGML_LINESEARCH_MINIMUM_STEP, + GGML_LINESEARCH_MAXIMUM_STEP, + GGML_LINESEARCH_MAXIMUM_ITERATIONS, + GGML_LINESEARCH_INVALID_PARAMETERS, + }; + + // optimization parameters + // + // see ggml.c (ggml_opt_default_params) for default values + // + struct ggml_opt_params { + enum ggml_opt_type type; + + int n_threads; + + // delta-based convergence test + // + // if past == 0 - disabled + // if past > 0: + // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|) + // + int past; + float delta; + + // maximum number of iterations without improvement + // + // if 0 - disabled + // if > 0: + // assume convergence if no cost improvement in this number of iterations + // + int max_no_improvement; + + bool print_forward_graph; + bool print_backward_graph; + + // ADAM parameters + struct { + int n_iter; + + float sched; // schedule multiplier (fixed, decay or warmup) + float decay; // weight decay for AdamW, use 0.0f to disable + float alpha; // learning rate + float beta1; + float beta2; + float eps; // epsilon for numerical stability + float eps_f; // epsilon for convergence test + float eps_g; // epsilon for convergence test + } adam; + + // LBFGS parameters + struct { + int m; // number of corrections to approximate the inv. Hessian + int n_iter; + int max_linesearch; + + float eps; // convergence tolerance + float ftol; // line search tolerance + float wolfe; + float min_step; + float max_step; + + enum ggml_linesearch linesearch; + } lbfgs; + }; + + struct ggml_opt_context { + struct ggml_context * ctx; + struct ggml_opt_params params; + + int iter; + int64_t nx; // number of parameter elements + + bool just_initialized; + + struct { + struct ggml_tensor * x; // view of the parameters + struct ggml_tensor * g1; // gradient + struct ggml_tensor * g2; // gradient squared + struct ggml_tensor * m; // first moment + struct ggml_tensor * v; // second moment + struct ggml_tensor * mh; // first moment hat + struct ggml_tensor * vh; // second moment hat + struct ggml_tensor * pf; // past function values + float fx_best; + float fx_prev; + int n_no_improvement; + } adam; + + struct { + struct ggml_tensor * x; // current parameters + struct ggml_tensor * xp; // previous parameters + struct ggml_tensor * g; // current gradient + struct ggml_tensor * gp; // previous gradient + struct ggml_tensor * d; // search direction + struct ggml_tensor * pf; // past function values + struct ggml_tensor * lmal; // the L-BFGS memory alpha + struct ggml_tensor * lmys; // the L-BFGS memory ys + struct ggml_tensor * lms; // the L-BFGS memory s + struct ggml_tensor * lmy; // the L-BFGS memory y + float fx_best; + float step; + int j; + int k; + int end; + int n_no_improvement; + } lbfgs; + }; + + GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type); + + // optimize the function defined by the tensor f + GGML_API enum ggml_opt_result ggml_opt( + struct ggml_context * ctx, + struct ggml_opt_params params, + struct ggml_tensor * f); + + // initialize optimizer context + GGML_API void ggml_opt_init( + struct ggml_context * ctx, + struct ggml_opt_context * opt, + struct ggml_opt_params params, + int64_t nx); + + // continue optimizing the function defined by the tensor f + GGML_API enum ggml_opt_result ggml_opt_resume( + struct ggml_context * ctx, + struct ggml_opt_context * opt, + struct ggml_tensor * f); + + // continue optimizing the function defined by the tensor f + GGML_API enum ggml_opt_result ggml_opt_resume_g( + struct ggml_context * ctx, + struct ggml_opt_context * opt, + struct ggml_tensor * f, + struct ggml_cgraph * gf, + struct ggml_cgraph * gb); + + // + // quantization + // + + GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist); + GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist); + GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist); + GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist); + GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist); + + GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist); + + // + // system info + // + + GGML_API int ggml_cpu_has_avx (void); + GGML_API int ggml_cpu_has_avx2 (void); + GGML_API int ggml_cpu_has_avx512 (void); + GGML_API int ggml_cpu_has_avx512_vbmi(void); + GGML_API int ggml_cpu_has_avx512_vnni(void); + GGML_API int ggml_cpu_has_fma (void); + GGML_API int ggml_cpu_has_neon (void); + GGML_API int ggml_cpu_has_arm_fma (void); + GGML_API int ggml_cpu_has_f16c (void); + GGML_API int ggml_cpu_has_fp16_va (void); + GGML_API int ggml_cpu_has_wasm_simd (void); + GGML_API int ggml_cpu_has_blas (void); + GGML_API int ggml_cpu_has_cublas (void); + GGML_API int ggml_cpu_has_clblast (void); + GGML_API int ggml_cpu_has_gpublas (void); + GGML_API int ggml_cpu_has_sse3 (void); + GGML_API int ggml_cpu_has_vsx (void); + + // + // Internal types and functions exposed for tests and benchmarks + // + +#ifdef __cplusplus + // restrict not standard in C++ +#define GGML_RESTRICT +#else +#define GGML_RESTRICT restrict +#endif + typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); + typedef void (*quantize_row_q_t) (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); + typedef void (*vec_dot_q_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y); + + typedef struct { + dequantize_row_q_t dequantize_row_q; + quantize_row_q_t quantize_row_q; + quantize_row_q_t quantize_row_q_reference; + quantize_row_q_t quantize_row_q_dot; + vec_dot_q_t vec_dot_q; + enum ggml_type vec_dot_type; + } quantize_fns_t; + + quantize_fns_t ggml_internal_get_quantize_fn(size_t i); + +#ifdef __cplusplus +} +#endif diff --git a/ctransformers/models/ggml/k_quants.c b/ctransformers/models/ggml/k_quants.c new file mode 100644 index 0000000000000000000000000000000000000000..a48c821710cbb206069a01ad1a48cb65e7dade4f --- /dev/null +++ b/ctransformers/models/ggml/k_quants.c @@ -0,0 +1,2244 @@ +#include "k_quants.h" +#include "ggml.h" + +#include +#include +#include + +#ifdef __ARM_NEON + +// if YCM cannot find , make a symbolic link to it, for example: +// +// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ +// +#include + +#else + +#ifdef __wasm_simd128__ +#include +#else +#ifdef __POWER9_VECTOR__ +#include +#undef bool +#define bool _Bool +#else +#if defined(_MSC_VER) || defined(__MINGW32__) +#include +#else +#if !defined(__riscv) +#include +#endif +#endif +#endif +#endif +#endif + +#undef MIN +#undef MAX +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +// +// 2-6 bit quantization in super-blocks +// + + +// +// ===================== Helper functions +// +static inline int nearest_int(float fval) { + assert(fval <= 4194303.f); + float val = fval + 12582912.f; + int i; memcpy(&i, &val, sizeof(int)); + return (i & 0x007fffff) - 0x00400000; +} + +static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t * restrict L, int rmse_type) { + float max = 0; + float amax = 0; + for (int i = 0; i < n; ++i) { + float ax = fabsf(x[i]); + if (ax > amax) { amax = ax; max = x[i]; } + } + if (!amax) { // all zero + for (int i = 0; i < n; ++i) { + L[i] = 0; + } + return 0.f; + } + float iscale = -nmax / max; + if (rmse_type == 0) { + for (int i = 0; i < n; ++i) { + int l = nearest_int(iscale * x[i]); + L[i] = nmax + MAX(-nmax, MIN(nmax-1, l)); + } + return 1/iscale; + } + int weight_type = rmse_type%2; + float sumlx = 0; + float suml2 = 0; + for (int i = 0; i < n; ++i) { + int l = nearest_int(iscale * x[i]); + l = MAX(-nmax, MIN(nmax-1, l)); + L[i] = l + nmax; + float w = weight_type == 1 ? x[i] * x[i] : 1; + sumlx += w*x[i]*l; + suml2 += w*l*l; + } + float scale = sumlx/suml2; + float best = scale * sumlx; + for (int itry = 0; itry < 3; ++itry) { + iscale = 1/scale; + float slx = 0; + float sl2 = 0; + bool changed = false; + for (int i = 0; i < n; ++i) { + int l = nearest_int(iscale * x[i]); + l = MAX(-nmax, MIN(nmax-1, l)); + if (l + nmax != L[i]) { changed = true; } + float w = weight_type == 1 ? x[i] * x[i] : 1.f; + slx += w*x[i]*l; + sl2 += w*l*l; + } + if (!changed || sl2 == 0 || slx*slx <= best*sl2) { break; } + for (int i = 0; i < n; ++i) { + int l = nearest_int(iscale * x[i]); + L[i] = nmax + MAX(-nmax, MIN(nmax-1, l)); + } + sumlx = slx; suml2 = sl2; + scale = sumlx/suml2; + best = scale * sumlx; + } + for (int itry = 0; itry < 5; ++itry) { + int n_changed = 0; + for (int i = 0; i < n; ++i) { + float w = weight_type == 1 ? x[i]*x[i] : 1; + int l = L[i] - nmax; + float slx = sumlx - w*x[i]*l; + if (slx > 0) { + float sl2 = suml2 - w*l*l; + int new_l = nearest_int(x[i] * sl2 / slx); + new_l = MAX(-nmax, MIN(nmax-1, new_l)); + if (new_l != l) { + slx += w*x[i]*new_l; + sl2 += w*new_l*new_l; + if (sl2 > 0 && slx*slx*suml2 > sumlx*sumlx*sl2) { + L[i] = nmax + new_l; sumlx = slx; suml2 = sl2; + scale = sumlx / suml2; best = scale * sumlx; + ++n_changed; + } + } + } + } + if (!n_changed) { break; } + } + if (rmse_type < 3) { + return scale; + } + for (int is = -4; is <= 4; ++is) { + if (is == 0) { + continue; + } + iscale = -(nmax + 0.1f*is) / max; + sumlx = suml2 = 0; + for (int i = 0; i < n; ++i) { + int l = nearest_int(iscale * x[i]); + l = MAX(-nmax, MIN(nmax-1, l)); + float w = weight_type == 1 ? x[i] * x[i] : 1; + sumlx += w*x[i]*l; + suml2 += w*l*l; + } + if (suml2 > 0 && sumlx*sumlx > best*suml2) { + for (int i = 0; i < n; ++i) { + int l = nearest_int(iscale * x[i]); + L[i] = nmax + MAX(-nmax, MIN(nmax-1, l)); + } + scale = sumlx/suml2; best = scale*sumlx; + } + } + return scale; +} + +static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t * restrict L, bool do_rmse) { + float max = 0; + float amax = 0; + for (int i = 0; i < n; ++i) { + float ax = fabsf(x[i]); + if (ax > amax) { amax = ax; max = x[i]; } + } + if (!amax) { // all zero + for (int i = 0; i < n; ++i) { L[i] = 0; } + return 0.f; + } + float iscale = -nmax / max; + if (do_rmse) { + float sumlx = 0; + float suml2 = 0; + for (int i = 0; i < n; ++i) { + int l = nearest_int(iscale * x[i]); + l = MAX(-nmax, MIN(nmax-1, l)); + L[i] = l; + float w = x[i]*x[i]; + sumlx += w*x[i]*l; + suml2 += w*l*l; + } + for (int itry = 0; itry < 5; ++itry) { + int n_changed = 0; + for (int i = 0; i < n; ++i) { + float w = x[i]*x[i]; + float slx = sumlx - w*x[i]*L[i]; + if (slx > 0) { + float sl2 = suml2 - w*L[i]*L[i]; + int new_l = nearest_int(x[i] * sl2 / slx); + new_l = MAX(-nmax, MIN(nmax-1, new_l)); + if (new_l != L[i]) { + slx += w*x[i]*new_l; + sl2 += w*new_l*new_l; + if (sl2 > 0 && slx*slx*suml2 > sumlx*sumlx*sl2) { + L[i] = new_l; sumlx = slx; suml2 = sl2; + ++n_changed; + } + } + } + } + if (!n_changed) { + break; + } + } + for (int i = 0; i < n; ++i) { + L[i] += nmax; + } + return sumlx / suml2; + } + for (int i = 0; i < n; ++i) { + int l = nearest_int(iscale * x[i]); + l = MAX(-nmax, MIN(nmax-1, l)); + L[i] = l + nmax; + } + return 1/iscale; +} + +static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t * restrict L, float * restrict the_min, int ntry) { + float min = x[0]; + float max = x[0]; + for (int i = 1; i < n; ++i) { + if (x[i] < min) min = x[i]; + if (x[i] > max) max = x[i]; + } + if (max == min) { + for (int i = 0; i < n; ++i) L[i] = 0; + *the_min = 0; + return 0.f; + } + if (min > 0) min = 0; + float iscale = nmax/(max - min); + float scale = 1/iscale; + for (int itry = 0; itry < ntry; ++itry) { + float sumlx = 0; int suml2 = 0; + bool did_change = false; + for (int i = 0; i < n; ++i) { + int l = nearest_int(iscale*(x[i] - min)); + l = MAX(0, MIN(nmax, l)); + if (l != L[i]) { + L[i] = l; + did_change = true; + } + sumlx += (x[i] - min)*l; + suml2 += l*l; + } + scale = sumlx/suml2; + float sum = 0; + for (int i = 0; i < n; ++i) { + sum += x[i] - scale*L[i]; + } + min = sum/n; + if (min > 0) min = 0; + iscale = 1/scale; + if (!did_change) break; + } + *the_min = -min; + return scale; +} + +static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t * restrict d, uint8_t * restrict m) { + if (j < 4) { + *d = q[j] & 63; *m = q[j + 4] & 63; + } else { + *d = (q[j+4] & 0xF) | ((q[j-4] >> 6) << 4); + *m = (q[j+4] >> 4) | ((q[j-0] >> 6) << 4); + } +} + +//========================- 2-bit (de)-quantization + +void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + uint8_t L[QK_K]; + float mins[QK_K/16]; + float scales[QK_K/16]; + + const float q4scale = 15.f; + + for (int i = 0; i < nb; i++) { + + float max_scale = 0; // as we are deducting the min, scales are always positive + float max_min = 0; + for (int j = 0; j < QK_K/16; ++j) { + scales[j] = make_qkx1_quants(16, 3, x + 16*j, L + 16*j, &mins[j], 5); + float scale = scales[j]; + if (scale > max_scale) { + max_scale = scale; + } + float min = mins[j]; + if (min > max_min) { + max_min = min; + } + } + + if (max_scale > 0) { + float iscale = q4scale/max_scale; + for (int j = 0; j < QK_K/16; ++j) { + int l = nearest_int(iscale*scales[j]); + y[i].scales[j] = l; + } + y[i].d = ggml_fp32_to_fp16(max_scale/q4scale); + } else { + for (int j = 0; j < QK_K/16; ++j) y[i].scales[j] = 0; + y[i].d = ggml_fp32_to_fp16(0.f); + } + if (max_min > 0) { + float iscale = q4scale/max_min; + for (int j = 0; j < QK_K/16; ++j) { + int l = nearest_int(iscale*mins[j]); + y[i].scales[j] |= (l << 4); + } + y[i].dmin = ggml_fp32_to_fp16(max_min/q4scale); + } else { + y[i].dmin = ggml_fp32_to_fp16(0.f); + } + for (int j = 0; j < QK_K/16; ++j) { + const float d = ggml_fp16_to_fp32(y[i].d) * (y[i].scales[j] & 0xF); + if (!d) continue; + const float dm = ggml_fp16_to_fp32(y[i].dmin) * (y[i].scales[j] >> 4); + for (int ii = 0; ii < 16; ++ii) { + int l = nearest_int((x[16*j + ii] + dm)/d); + l = MAX(0, MIN(3, l)); + L[16*j + ii] = l; + } + } + + for (int j = 0; j < QK_K; j += 128) { + for (int l = 0; l < 32; ++l) { + y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6); + } + } + + x += QK_K; + + } +} + +void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + for (int i = 0; i < nb; i++) { + + const float d = ggml_fp16_to_fp32(x[i].d); + const float min = ggml_fp16_to_fp32(x[i].dmin); + + const uint8_t * q = x[i].qs; + + int is = 0; + float dl, ml; + for (int n = 0; n < QK_K; n += 128) { + int shift = 0; + for (int j = 0; j < 4; ++j) { + + uint8_t sc = x[i].scales[is++]; + dl = d * (sc & 0xF); ml = min * (sc >> 4); + for (int l = 0; l < 16; ++l) *y++ = dl * ((int8_t)((q[l] >> shift) & 3)) - ml; + + sc = x[i].scales[is++]; + dl = d * (sc & 0xF); ml = min * (sc >> 4); + for (int l = 0; l < 16; ++l) *y++ = dl * ((int8_t)((q[l+16] >> shift) & 3)) - ml; + + shift += 2; + } + q += 32; + } + + } +} + +void quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) { + quantize_row_q2_K_reference(x, vy, k); +} + +size_t ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { + const int nb = k / QK_K; + + // TODO - collect histograms - although, at a second thought, I don't really care about them + (void)hist; + + for (int j = 0; j < nb; j += k) { + block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K; + quantize_row_q2_K_reference(src + j, y, k); + } + return (n/QK_K*sizeof(block_q2_K)); +} + +//========================= 3-bit (de)-quantization + +void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + int8_t L[QK_K]; + float scales[QK_K / 16]; + + for (int i = 0; i < nb; i++) { + + float max_scale = 0; + float amax = 0; + for (int j = 0; j < QK_K/16; ++j) { + scales[j] = make_q3_quants(16, 4, x + 16*j, L + 16*j, true); + float scale = fabsf(scales[j]); + if (scale > amax) { + amax = scale; max_scale = scales[j]; + } + } + + memset(y[i].scales, 0, 12); + if (max_scale) { + float iscale = -32.f/max_scale; + for (int j = 0; j < QK_K/16; ++j) { + int8_t l = nearest_int(iscale*scales[j]); + l = MAX(-32, MIN(31, l)) + 32; + if (j < 8) { + y[i].scales[j] = l & 0xF; + } else { + y[i].scales[j-8] |= ((l & 0xF) << 4); + } + l >>= 4; + y[i].scales[j%4 + 8] |= (l << (2*(j/4))); + } + y[i].d = ggml_fp32_to_fp16(1/iscale); + } else { + y[i].d = ggml_fp32_to_fp16(0.f); + } + + int8_t sc; + for (int j = 0; j < QK_K/16; ++j) { + sc = j < 8 ? y[i].scales[j] & 0xF : y[i].scales[j-8] >> 4; + sc = (sc | (((y[i].scales[8 + j%4] >> (2*(j/4))) & 3) << 4)) - 32; + float d = ggml_fp16_to_fp32(y[i].d) * sc; + if (!d) { + continue; + } + for (int ii = 0; ii < 16; ++ii) { + int l = nearest_int(x[16*j + ii]/d); + l = MAX(-4, MIN(3, l)); + L[16*j + ii] = l + 4; + } + } + + memset(y[i].hmask, 0, QK_K/8); + // We put the high-bit for the 1st 32 quants into bit 0, the next 32 into bit 1, etc. + int m = 0; + uint8_t hm = 1; + for (int j = 0; j < QK_K; ++j) { + if (L[j] > 3) { + y[i].hmask[m] |= hm; + L[j] -= 4; + } + if (++m == QK_K/8) { + m = 0; hm <<= 1; + } + } + for (int j = 0; j < QK_K; j += 128) { + for (int l = 0; l < 32; ++l) { + y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6); + } + } + + x += QK_K; + } +} + +void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k) { + assert(k % QK_K == 0); + assert(QK_K == 256); + const int nb = k / QK_K; + + const uint32_t kmask1 = 0x03030303; + const uint32_t kmask2 = 0x0f0f0f0f; + + uint32_t aux[4]; + const int8_t * scales = (const int8_t*)aux; + + for (int i = 0; i < nb; i++) { + + const float d_all = ggml_fp16_to_fp32(x[i].d); + + const uint8_t * restrict q = x[i].qs; + const uint8_t * restrict hm = x[i].hmask; + uint8_t m = 1; + + memcpy(aux, x[i].scales, 12); + uint32_t tmp = aux[2]; + aux[2] = ((aux[0] >> 4) & kmask2) | (((tmp >> 4) & kmask1) << 4); + aux[3] = ((aux[1] >> 4) & kmask2) | (((tmp >> 6) & kmask1) << 4); + aux[0] = (aux[0] & kmask2) | (((tmp >> 0) & kmask1) << 4); + aux[1] = (aux[1] & kmask2) | (((tmp >> 2) & kmask1) << 4); + + int is = 0; + float dl; + for (int n = 0; n < QK_K; n += 128) { + int shift = 0; + for (int j = 0; j < 4; ++j) { + + dl = d_all * (scales[is++] - 32); + for (int l = 0; l < 16; ++l) { + *y++ = dl * ((int8_t)((q[l+ 0] >> shift) & 3) - ((hm[l+ 0] & m) ? 0 : 4)); + } + + dl = d_all * (scales[is++] - 32); + for (int l = 0; l < 16; ++l) { + *y++ = dl * ((int8_t)((q[l+16] >> shift) & 3) - ((hm[l+16] & m) ? 0 : 4)); + } + + shift += 2; + m <<= 1; + } + q += 32; + } + + } +} + +void quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) { + quantize_row_q3_K_reference(x, vy, k); +} + +size_t ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { + const int nb = k / QK_K; + + // TODO - collect histograms - although, at a second thought, I don't really care about them + (void)hist; + + for (int j = 0; j < nb; j += k) { + block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K; + quantize_row_q3_K_reference(src + j, y, k); + } + return (n/QK_K*sizeof(block_q3_K)); +} + +// ====================== 4-bit (de)-quantization + +void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + uint8_t L[QK_K]; + float mins[QK_K/32]; + float scales[QK_K/32]; + + for (int i = 0; i < nb; i++) { + + float max_scale = 0; // as we are deducting the min, scales are always positive + float max_min = 0; + for (int j = 0; j < QK_K/32; ++j) { + scales[j] = make_qkx1_quants(32, 15, x + 32*j, L + 32*j, &mins[j], 5); + float scale = scales[j]; + if (scale > max_scale) { + max_scale = scale; + } + float min = mins[j]; + if (min > max_min) { + max_min = min; + } + } + + float inv_scale = max_scale > 0 ? 63.f/max_scale : 0.f; + float inv_min = max_min > 0 ? 63.f/max_min : 0.f; + for (int j = 0; j < QK_K/32; ++j) { + uint8_t ls = nearest_int(inv_scale*scales[j]); + uint8_t lm = nearest_int(inv_min*mins[j]); + ls = MIN(63, ls); + lm = MIN(63, lm); + if (j < 4) { + y[i].scales[j] = ls; + y[i].scales[j+4] = lm; + } else { + y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4); + y[i].scales[j-4] |= ((ls >> 4) << 6); + y[i].scales[j-0] |= ((lm >> 4) << 6); + } + } + y[i].d = ggml_fp32_to_fp16(max_scale/63.f); + y[i].dmin = ggml_fp32_to_fp16(max_min/63.f); + + uint8_t sc, m; + for (int j = 0; j < QK_K/32; ++j) { + get_scale_min_k4(j, y[i].scales, &sc, &m); + const float d = ggml_fp16_to_fp32(y[i].d) * sc; + if (!d) continue; + const float dm = ggml_fp16_to_fp32(y[i].dmin) * m; + for (int ii = 0; ii < 32; ++ii) { + int l = nearest_int((x[32*j + ii] + dm)/d); + l = MAX(0, MIN(15, l)); + L[32*j + ii] = l; + } + } + uint8_t * q = y[i].qs; + for (int j = 0; j < QK_K; j += 64) { + for (int l = 0; l < 32; ++l) *q++ = L[j + l] | (L[j + l + 32] << 4); + } + + x += QK_K; + + } +} + +void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + for (int i = 0; i < nb; i++) { + + const float d = ggml_fp16_to_fp32(x[i].d); + const float min = ggml_fp16_to_fp32(x[i].dmin); + + const uint8_t * q = x[i].qs; + + int is = 0; + uint8_t sc, m; + for (int j = 0; j < QK_K; j += 64) { + get_scale_min_k4(is + 0, x[i].scales, &sc, &m); + const float d1 = d * sc; const float m1 = min * m; + get_scale_min_k4(is + 1, x[i].scales, &sc, &m); + const float d2 = d * sc; const float m2 = min * m; + for (int l = 0; l < 32; ++l) *y++ = d1 * (q[l] & 0xF) - m1; + for (int l = 0; l < 32; ++l) *y++ = d2 * (q[l] >> 4) - m2; + q += 32; is += 2; + } + + } +} + +void quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) { + assert(k % QK_K == 0); + block_q4_K * restrict y = vy; + quantize_row_q4_K_reference(x, y, k); +} + +size_t ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + (void)hist; // TODO: collect histograms + for (int j = 0; j < nb; j += k) { + block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K; + quantize_row_q4_K_reference(src + j, y, k); + } + return (n/QK_K*sizeof(block_q4_K)); +} + +// ====================== 5-bit (de)-quantization + +void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + uint8_t L[QK_K]; + float mins[QK_K/32]; + float scales[QK_K/32]; + + for (int i = 0; i < nb; i++) { + + float max_scale = 0; // as we are deducting the min, scales are always positive + float max_min = 0; + for (int j = 0; j < QK_K/32; ++j) { + scales[j] = make_qkx1_quants(32, 31, x + 32*j, L + 32*j, &mins[j], 5); + float scale = scales[j]; + if (scale > max_scale) { + max_scale = scale; + } + float min = mins[j]; + if (min > max_min) { + max_min = min; + } + } + + float inv_scale = max_scale > 0 ? 63.f/max_scale : 0.f; + float inv_min = max_min > 0 ? 63.f/max_min : 0.f; + for (int j = 0; j < QK_K/32; ++j) { + uint8_t ls = nearest_int(inv_scale*scales[j]); + uint8_t lm = nearest_int(inv_min*mins[j]); + ls = MIN(63, ls); + lm = MIN(63, lm); + if (j < 4) { + y[i].scales[j] = ls; + y[i].scales[j+4] = lm; + } else { + y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4); + y[i].scales[j-4] |= ((ls >> 4) << 6); + y[i].scales[j-0] |= ((lm >> 4) << 6); + } + } + y[i].d = ggml_fp32_to_fp16(max_scale/63.f); + y[i].dmin = ggml_fp32_to_fp16(max_min/63.f); + + uint8_t sc, m; + for (int j = 0; j < QK_K/32; ++j) { + get_scale_min_k4(j, y[i].scales, &sc, &m); + const float d = ggml_fp16_to_fp32(y[i].d) * sc; + if (!d) continue; + const float dm = ggml_fp16_to_fp32(y[i].dmin) * m; + for (int ii = 0; ii < 32; ++ii) { + int l = nearest_int((x[32*j + ii] + dm)/d); + l = MAX(0, MIN(31, l)); + L[32*j + ii] = l; + } + } + + uint8_t * restrict qh = y[i].qh; + uint8_t * restrict ql = y[i].qs; + memset(qh, 0, QK_K/8); + + uint8_t m1 = 1, m2 = 2; + for (int n = 0; n < QK_K; n += 64) { + for (int j = 0; j < 32; ++j) { + int l1 = L[n + j]; + if (l1 > 15) { + l1 -= 16; qh[j] |= m1; + } + int l2 = L[n + j + 32]; + if (l2 > 15) { + l2 -= 16; qh[j] |= m2; + } + ql[j] = l1 | (l2 << 4); + } + m1 <<= 2; m2 <<= 2; + ql += 32; + } + + x += QK_K; + + } +} + +void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + for (int i = 0; i < nb; i++) { + + const float d = ggml_fp16_to_fp32(x[i].d); + const float min = ggml_fp16_to_fp32(x[i].dmin); + + const uint8_t * ql = x[i].qs; + const uint8_t * qh = x[i].qh; + + int is = 0; + uint8_t sc, m; + uint8_t u1 = 1, u2 = 2; + for (int j = 0; j < QK_K; j += 64) { + get_scale_min_k4(is + 0, x[i].scales, &sc, &m); + const float d1 = d * sc; const float m1 = min * m; + get_scale_min_k4(is + 1, x[i].scales, &sc, &m); + const float d2 = d * sc; const float m2 = min * m; + for (int l = 0; l < 32; ++l) *y++ = d1 * ((ql[l] & 0xF) + (qh[l] & u1 ? 16 : 0)) - m1; + for (int l = 0; l < 32; ++l) *y++ = d2 * ((ql[l] >> 4) + (qh[l] & u2 ? 16 : 0)) - m2; + ql += 32; is += 2; + u1 <<= 2; u2 <<= 2; + } + } +} + +void quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) { + assert(k % QK_K == 0); + block_q5_K * restrict y = vy; + quantize_row_q5_K_reference(x, y, k); +} + +size_t ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + (void)hist; + for (int j = 0; j < nb; j += k) { + block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K; + quantize_row_q5_K_reference(src + j, y, k); + } + return (n/QK_K*sizeof(block_q5_K)); +} + +// ====================== 6-bit (de)-quantization + +void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + int8_t L[QK_K]; + float scales[QK_K/16]; + + for (int i = 0; i < nb; i++) { + + float max_scale = 0; + float max_abs_scale = 0; + + for (int ib = 0; ib < QK_K/16; ++ib) { + + const float scale = make_qx_quants(16, 32, x + 16*ib, L + 16*ib, 1); + scales[ib] = scale; + + const float abs_scale = fabsf(scale); + if (abs_scale > max_abs_scale) { + max_abs_scale = abs_scale; + max_scale = scale; + } + + } + + float iscale = -128.f/max_scale; + y[i].d = ggml_fp32_to_fp16(1/iscale); + for (int ib = 0; ib < QK_K/16; ++ib) { + y[i].scales[ib] = MIN(127, nearest_int(iscale*scales[ib])); + } + + for (int j = 0; j < QK_K/16; ++j) { + float d = ggml_fp16_to_fp32(y[i].d) * y[i].scales[j]; + if (!d) { + continue; + } + for (int ii = 0; ii < 16; ++ii) { + int l = nearest_int(x[16*j + ii]/d); + l = MAX(-32, MIN(31, l)); + L[16*j + ii] = l + 32; + } + } + + uint8_t * restrict ql = y[i].ql; + uint8_t * restrict qh = y[i].qh; + for (int j = 0; j < QK_K; j += 128) { + for (int l = 0; l < 32; ++l) { + const uint8_t q1 = L[j + l + 0] & 0xF; + const uint8_t q2 = L[j + l + 32] & 0xF; + const uint8_t q3 = L[j + l + 64] & 0xF; + const uint8_t q4 = L[j + l + 96] & 0xF; + ql[l+ 0] = q1 | (q3 << 4); + ql[l+32] = q2 | (q4 << 4); + qh[l] = (L[j + l] >> 4) | ((L[j + l + 32] >> 4) << 2) | ((L[j + l + 64] >> 4) << 4) | ((L[j + l + 96] >> 4) << 6); + } + ql += 64; + qh += 32; + } + + x += QK_K; + + } +} + +void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + for (int i = 0; i < nb; i++) { + + const float d = ggml_fp16_to_fp32(x[i].d); + + const uint8_t * restrict ql = x[i].ql; + const uint8_t * restrict qh = x[i].qh; + const int8_t * restrict sc = x[i].scales; + + for (int n = 0; n < QK_K; n += 128) { + for (int l = 0; l < 32; ++l) { + int is = l/16; + const int8_t q1 = (int8_t)((ql[l + 0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32; + const int8_t q2 = (int8_t)((ql[l + 32] & 0xF) | (((qh[l] >> 2) & 3) << 4)) - 32; + const int8_t q3 = (int8_t)((ql[l + 0] >> 4) | (((qh[l] >> 4) & 3) << 4)) - 32; + const int8_t q4 = (int8_t)((ql[l + 32] >> 4) | (((qh[l] >> 6) & 3) << 4)) - 32; + y[l + 0] = d * sc[is + 0] * q1; + y[l + 32] = d * sc[is + 2] * q2; + y[l + 64] = d * sc[is + 4] * q3; + y[l + 96] = d * sc[is + 6] * q4; + } + y += 128; + ql += 64; + qh += 32; + sc += 8; + } + + } +} + +void quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) { + assert(k % QK_K == 0); + block_q6_K * restrict y = vy; + quantize_row_q6_K_reference(x, y, k); +} + +size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + (void)hist; // TODO + + for (int j = 0; j < nb; j += k) { + block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K; + quantize_row_q6_K_reference(src + j, y, k); + } + return (n/QK_K*sizeof(block_q6_K)); +} + +//===================================== Q8_K ============================================== + +void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + for (int i = 0; i < nb; i++) { + + float max = 0; + float amax = 0; + for (int j = 0; j < QK_K; ++j) { + float ax = fabsf(x[j]); + if (ax > amax) { + amax = ax; max = x[j]; + } + } + if (!amax) { + y[i].d = 0; + memset(y[i].qs, 0, QK_K); + x += QK_K; + continue; + } + const float iscale = -128.f/max; + for (int j = 0; j < QK_K; ++j) { + int v = nearest_int(iscale*x[j]); + y[i].qs[j] = MIN(127, v); + } + for (int j = 0; j < QK_K/16; ++j) { + int sum = 0; + for (int ii = 0; ii < 16; ++ii) { + sum += y[i].qs[j*16 + ii]; + } + y[i].bsums[j] = sum; + } + y[i].d = 1/iscale; + x += QK_K; + } +} + +void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k) { + assert(k % QK_K == 0); + const int nb = k / QK_K; + + for (int i = 0; i < nb; i++) { + for (int j = 0; j < QK_K; ++j) { + *y++ = x[i].d * x[i].qs[j]; + } + } +} + +void quantize_row_q8_K(const float * restrict x, void * restrict y, int k) { + quantize_row_q8_K_reference(x, y, k); +} + +//===================================== Dot ptoducts ================================= + +// +// Helper functions +// +#if __AVX__ || __AVX2__ || __AVX512F__ + +// horizontally add 8 floats +static inline float hsum_float_8(const __m256 x) { + __m128 res = _mm256_extractf128_ps(x, 1); + res = _mm_add_ps(res, _mm256_castps256_ps128(x)); + res = _mm_add_ps(res, _mm_movehl_ps(res, res)); + res = _mm_add_ss(res, _mm_movehdup_ps(res)); + return _mm_cvtss_f32(res); +} + +// shuffles to pick the required scales in dot products +static inline __m256i get_scale_shuffle_q3k(int i) { + static const uint8_t k_shuffle[128] = { + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, + 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11, + 12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13, 14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15, + }; + return _mm256_loadu_si256((const __m256i*)k_shuffle + i); +} +static inline __m256i get_scale_shuffle_k4(int i) { + static const uint8_t k_shuffle[256] = { + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, + 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, + 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, + 10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11, + 12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13,12,13, + 14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15 + }; + return _mm256_loadu_si256((const __m256i*)k_shuffle + i); +} +static inline __m128i get_scale_shuffle(int i) { + static const uint8_t k_shuffle[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10,10,10,10,10,10,10,10, 11,11,11,11,11,11,11,11, + 12,12,12,12,12,12,12,12, 13,13,13,13,13,13,13,13, + 14,14,14,14,14,14,14,14, 15,15,15,15,15,15,15,15 + }; + return _mm_loadu_si128((const __m128i*)k_shuffle + i); +} +#endif + +void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + + const block_q2_K * restrict x = vx; + const block_q8_K * restrict y = vy; + + const int nb = n / QK_K; + +#ifdef __ARM_NEON + + const uint8x16_t m3 = vdupq_n_u8(0x3); + const uint8x16_t m4 = vdupq_n_u8(0xF); + const int32x4_t vzero = vdupq_n_s32(0); + + int8x16x2_t q2bytes; + uint8_t aux[16]; + + float sum = 0; + + for (int i = 0; i < nb; ++i) { + + const float d = y[i].d * ggml_fp16_to_fp32(x[i].d); + const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin); + + const uint8_t * restrict q2 = x[i].qs; + const int8_t * restrict q8 = y[i].qs; + const uint8_t * restrict sc = x[i].scales; + + const uint8x16_t mins_and_scales = vld1q_u8(sc); + const uint8x16_t scales = vandq_u8(mins_and_scales, m4); + vst1q_u8(aux, scales); + + const uint8x16_t mins = vshrq_n_u8(mins_and_scales, 4); + const int16x8x2_t q8sums = vld1q_s16_x2(y[i].bsums); + const int16x8x2_t mins16 = {vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mins))), vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mins)))}; + const int32x4_t s0 = vaddq_s32(vmull_s16(vget_low_s16 (mins16.val[0]), vget_low_s16 (q8sums.val[0])), + vmull_s16(vget_high_s16(mins16.val[0]), vget_high_s16(q8sums.val[0]))); + const int32x4_t s1 = vaddq_s32(vmull_s16(vget_low_s16 (mins16.val[1]), vget_low_s16 (q8sums.val[1])), + vmull_s16(vget_high_s16(mins16.val[1]), vget_high_s16(q8sums.val[1]))); + sum += dmin * vaddvq_s32(vaddq_s32(s0, s1)); + + int isum = 0; + int is = 0; + +// We use this macro instead of a function call because for some reason +// the code runs 2-3% slower, even if the function is declared inline +#if defined(__ARM_FEATURE_DOTPROD) +#define MULTIPLY_ACCUM_WITH_SCALE(index)\ + isum += vaddvq_s32(vdotq_s32(vzero, q2bytes.val[0], q8bytes.val[0])) * aux[is+(index)];\ + isum += vaddvq_s32(vdotq_s32(vzero, q2bytes.val[1], q8bytes.val[1])) * aux[is+1+(index)]; +#else +#define MULTIPLY_ACCUM_WITH_SCALE(index)\ + {\ + const int16x8_t p1 = vaddq_s16(vmull_s8(vget_low_s8 (q2bytes.val[0]), vget_low_s8 (q8bytes.val[0])),\ + vmull_s8(vget_high_s8(q2bytes.val[0]), vget_high_s8(q8bytes.val[0])));\ + const int16x8_t p2 = vaddq_s16(vmull_s8(vget_low_s8 (q2bytes.val[1]), vget_low_s8 (q8bytes.val[1])),\ + vmull_s8(vget_high_s8(q2bytes.val[1]), vget_high_s8(q8bytes.val[1])));\ + isum += vaddvq_s16(p1) * aux[is+(index)] + vaddvq_s16(p2) * aux[is+1+(index)];\ + } +#endif + +#define SHIFT_MULTIPLY_ACCUM_WITH_SCALE(shift, index)\ + q8bytes = vld1q_s8_x2(q8); q8 += 32;\ + q2bytes.val[0] = vreinterpretq_s8_u8(vandq_u8(vshrq_n_u8(q2bits.val[0], (shift)), m3));\ + q2bytes.val[1] = vreinterpretq_s8_u8(vandq_u8(vshrq_n_u8(q2bits.val[1], (shift)), m3));\ + MULTIPLY_ACCUM_WITH_SCALE((index)); + + + for (int j = 0; j < QK_K/128; ++j) { + + const uint8x16x2_t q2bits = vld1q_u8_x2(q2); q2 += 32; + + int8x16x2_t q8bytes = vld1q_s8_x2(q8); q8 += 32; + q2bytes.val[0] = vreinterpretq_s8_u8(vandq_u8(q2bits.val[0], m3)); + q2bytes.val[1] = vreinterpretq_s8_u8(vandq_u8(q2bits.val[1], m3)); + MULTIPLY_ACCUM_WITH_SCALE(0); + + SHIFT_MULTIPLY_ACCUM_WITH_SCALE(2, 2); + + SHIFT_MULTIPLY_ACCUM_WITH_SCALE(4, 4); + + SHIFT_MULTIPLY_ACCUM_WITH_SCALE(6, 6); + + is += 8; + } + sum += d * isum; + + } + + *s = sum; + +#elif defined __AVX2__ + + const __m256i m3 = _mm256_set1_epi8(3); + const __m128i m4 = _mm_set1_epi8(0xF); + + __m256 acc = _mm256_setzero_ps(); + + for (int i = 0; i < nb; ++i) { + + const float d = y[i].d * ggml_fp16_to_fp32(x[i].d); + const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin); + + const uint8_t * restrict q2 = x[i].qs; + const int8_t * restrict q8 = y[i].qs; + + const __m128i mins_and_scales = _mm_loadu_si128((const __m128i*)x[i].scales); + const __m128i scales8 = _mm_and_si128(mins_and_scales, m4); + const __m128i mins8 = _mm_and_si128(_mm_srli_epi16(mins_and_scales, 4), m4); + const __m256i mins = _mm256_cvtepi8_epi16(mins8); + const __m256i prod = _mm256_madd_epi16(mins, _mm256_loadu_si256((const __m256i*)y[i].bsums)); + + acc = _mm256_fmadd_ps(_mm256_broadcast_ss(&dmin), _mm256_cvtepi32_ps(prod), acc); + + const __m256i all_scales = _mm256_cvtepi8_epi16(scales8); + const __m128i l_scales = _mm256_extracti128_si256(all_scales, 0); + const __m128i h_scales = _mm256_extracti128_si256(all_scales, 1); + const __m256i scales[2] = {_mm256_set_m128i(l_scales, l_scales), _mm256_set_m128i(h_scales, h_scales)}; + + __m256i sumi = _mm256_setzero_si256(); + + for (int j = 0; j < QK_K/128; ++j) { + + const __m256i q2bits = _mm256_loadu_si256((const __m256i*)q2); q2 += 32; + + const __m256i q8_0 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_1 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_2 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_3 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + + const __m256i q2_0 = _mm256_and_si256(q2bits, m3); + const __m256i q2_1 = _mm256_and_si256(_mm256_srli_epi16(q2bits, 2), m3); + const __m256i q2_2 = _mm256_and_si256(_mm256_srli_epi16(q2bits, 4), m3); + const __m256i q2_3 = _mm256_and_si256(_mm256_srli_epi16(q2bits, 6), m3); + + __m256i p0 = _mm256_maddubs_epi16(q2_0, q8_0); + __m256i p1 = _mm256_maddubs_epi16(q2_1, q8_1); + __m256i p2 = _mm256_maddubs_epi16(q2_2, q8_2); + __m256i p3 = _mm256_maddubs_epi16(q2_3, q8_3); + + p0 = _mm256_madd_epi16(_mm256_shuffle_epi8(scales[j], get_scale_shuffle_q3k(0)), p0); + p1 = _mm256_madd_epi16(_mm256_shuffle_epi8(scales[j], get_scale_shuffle_q3k(1)), p1); + p2 = _mm256_madd_epi16(_mm256_shuffle_epi8(scales[j], get_scale_shuffle_q3k(2)), p2); + p3 = _mm256_madd_epi16(_mm256_shuffle_epi8(scales[j], get_scale_shuffle_q3k(3)), p3); + + p0 = _mm256_add_epi32(p0, p1); + p2 = _mm256_add_epi32(p2, p3); + + sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p0, p2)); + } + + acc = _mm256_fmadd_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(sumi), acc); + + } + + *s = hsum_float_8(acc); + +#else + + float sumf = 0; + + for (int i = 0; i < nb; ++i) { + + const uint8_t * q2 = x[i].qs; + const int8_t * q8 = y[i].qs; + const uint8_t * sc = x[i].scales; + + int summs = 0; + for (int j = 0; j < 16; ++j) { + summs += y[i].bsums[j] * (sc[j] >> 4); + } + + const float dall = y[i].d * ggml_fp16_to_fp32(x[i].d); + const float dmin = y[i].d * ggml_fp16_to_fp32(x[i].dmin); + + int isum = 0; + int is = 0; + int d; + for (int k = 0; k < QK_K/128; ++k) { + int shift = 0; + for (int j = 0; j < 4; ++j) { + d = sc[is++] & 0xF; + int isuml = 0; + for (int l = 0; l < 16; ++l) isuml += q8[l] * ((q2[l] >> shift) & 3); + isum += d * isuml; + d = sc[is++] & 0xF; + isuml = 0; + for (int l = 16; l < 32; ++l) isuml += q8[l] * ((q2[l] >> shift) & 3); + isum += d * isuml; + shift += 2; + q8 += 32; + } + q2 += 32; + } + sumf += dall * isum - dmin * summs; + } + *s = sumf; +#endif +} + +void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + assert(n % QK_K == 0); + + const uint32_t kmask1 = 0x03030303; + const uint32_t kmask2 = 0x0f0f0f0f; + + const block_q3_K * restrict x = vx; + const block_q8_K * restrict y = vy; + + const int nb = n / QK_K; + +#ifdef __ARM_NEON + + uint32_t aux[3]; + uint32_t utmp[4]; + + const uint8x16_t m3b = vdupq_n_u8(0x3); +#ifdef __ARM_FEATURE_DOTPROD + const int32x4_t vzero = vdupq_n_s32(0); +#endif + + const uint8x16_t m0 = vdupq_n_u8(1); + const uint8x16_t m1 = vshlq_n_u8(m0, 1); + const uint8x16_t m2 = vshlq_n_u8(m0, 2); + const uint8x16_t m3 = vshlq_n_u8(m0, 3); + const int8_t m32 = 32; + + int8x16x4_t q3bytes; + + float sum = 0; + + for (int i = 0; i < nb; ++i) { + + const float d = y[i].d * ggml_fp16_to_fp32(x[i].d); + + const uint8_t * restrict q3 = x[i].qs; + const uint8_t * restrict qh = x[i].hmask; + const int8_t * restrict q8 = y[i].qs; + + uint8x16x2_t qhbits = vld1q_u8_x2(qh); + + uint8x16x4_t q3h; + + int32_t isum = 0; + + // Set up scales + memcpy(aux, x[i].scales, 12); + utmp[3] = ((aux[1] >> 4) & kmask2) | (((aux[2] >> 6) & kmask1) << 4); + utmp[2] = ((aux[0] >> 4) & kmask2) | (((aux[2] >> 4) & kmask1) << 4); + utmp[1] = (aux[1] & kmask2) | (((aux[2] >> 2) & kmask1) << 4); + utmp[0] = (aux[0] & kmask2) | (((aux[2] >> 0) & kmask1) << 4); + + int8_t * scale = (int8_t *)utmp; + for (int j = 0; j < 16; ++j) scale[j] -= m32; + + for (int j = 0; j < QK_K/128; ++j) { + + const uint8x16x2_t q3bits = vld1q_u8_x2(q3); q3 += 32; + const int8x16x4_t q8bytes_1 = vld1q_s8_x4(q8); q8 += 64; + const int8x16x4_t q8bytes_2 = vld1q_s8_x4(q8); q8 += 64; + + q3h.val[0] = vshlq_n_u8(vbicq_u8(m0, qhbits.val[0]), 2); + q3h.val[1] = vshlq_n_u8(vbicq_u8(m0, qhbits.val[1]), 2); + q3h.val[2] = vshlq_n_u8(vbicq_u8(m1, qhbits.val[0]), 1); + q3h.val[3] = vshlq_n_u8(vbicq_u8(m1, qhbits.val[1]), 1); + + q3bytes.val[0] = vsubq_s8(vreinterpretq_s8_u8(vandq_u8(q3bits.val[0], m3b)), vreinterpretq_s8_u8(q3h.val[0])); + q3bytes.val[1] = vsubq_s8(vreinterpretq_s8_u8(vandq_u8(q3bits.val[1], m3b)), vreinterpretq_s8_u8(q3h.val[1])); + q3bytes.val[2] = vsubq_s8(vreinterpretq_s8_u8(vandq_u8(vshrq_n_u8(q3bits.val[0], 2), m3b)), vreinterpretq_s8_u8(q3h.val[2])); + q3bytes.val[3] = vsubq_s8(vreinterpretq_s8_u8(vandq_u8(vshrq_n_u8(q3bits.val[1], 2), m3b)), vreinterpretq_s8_u8(q3h.val[3])); + +#if defined(__ARM_FEATURE_DOTPROD) + isum += vaddvq_s32(vdotq_s32(vzero, q3bytes.val[0], q8bytes_1.val[0])) * scale[0]; + isum += vaddvq_s32(vdotq_s32(vzero, q3bytes.val[1], q8bytes_1.val[1])) * scale[1]; + isum += vaddvq_s32(vdotq_s32(vzero, q3bytes.val[2], q8bytes_1.val[2])) * scale[2]; + isum += vaddvq_s32(vdotq_s32(vzero, q3bytes.val[3], q8bytes_1.val[3])) * scale[3]; +#else + int16x8_t p0 = vaddq_s16(vmull_s8(vget_low_s8 (q3bytes.val[0]), vget_low_s8 (q8bytes_1.val[0])), + vmull_s8(vget_high_s8(q3bytes.val[0]), vget_high_s8(q8bytes_1.val[0]))); + int16x8_t p1 = vaddq_s16(vmull_s8(vget_low_s8 (q3bytes.val[1]), vget_low_s8 (q8bytes_1.val[1])), + vmull_s8(vget_high_s8(q3bytes.val[1]), vget_high_s8(q8bytes_1.val[1]))); + int16x8_t p2 = vaddq_s16(vmull_s8(vget_low_s8 (q3bytes.val[2]), vget_low_s8 (q8bytes_1.val[2])), + vmull_s8(vget_high_s8(q3bytes.val[2]), vget_high_s8(q8bytes_1.val[2]))); + int16x8_t p3 = vaddq_s16(vmull_s8(vget_low_s8 (q3bytes.val[3]), vget_low_s8 (q8bytes_1.val[3])), + vmull_s8(vget_high_s8(q3bytes.val[3]), vget_high_s8(q8bytes_1.val[3]))); + isum += vaddvq_s16(p0) * scale[0] + vaddvq_s16(p1) * scale[1] + vaddvq_s16(p2) * scale[2] + vaddvq_s16(p3) * scale[3]; +#endif + scale += 4; + + q3h.val[0] = vbicq_u8(m2, qhbits.val[0]); + q3h.val[1] = vbicq_u8(m2, qhbits.val[1]); + q3h.val[2] = vshrq_n_u8(vbicq_u8(m3, qhbits.val[0]), 1); + q3h.val[3] = vshrq_n_u8(vbicq_u8(m3, qhbits.val[1]), 1); + + q3bytes.val[0] = vsubq_s8(vreinterpretq_s8_u8(vandq_u8(vshrq_n_u8(q3bits.val[0], 4), m3b)), vreinterpretq_s8_u8(q3h.val[0])); + q3bytes.val[1] = vsubq_s8(vreinterpretq_s8_u8(vandq_u8(vshrq_n_u8(q3bits.val[1], 4), m3b)), vreinterpretq_s8_u8(q3h.val[1])); + q3bytes.val[2] = vsubq_s8(vreinterpretq_s8_u8(vandq_u8(vshrq_n_u8(q3bits.val[0], 6), m3b)), vreinterpretq_s8_u8(q3h.val[2])); + q3bytes.val[3] = vsubq_s8(vreinterpretq_s8_u8(vandq_u8(vshrq_n_u8(q3bits.val[1], 6), m3b)), vreinterpretq_s8_u8(q3h.val[3])); + +#if defined(__ARM_FEATURE_DOTPROD) + isum += vaddvq_s32(vdotq_s32(vzero, q3bytes.val[0], q8bytes_2.val[0])) * scale[0]; + isum += vaddvq_s32(vdotq_s32(vzero, q3bytes.val[1], q8bytes_2.val[1])) * scale[1]; + isum += vaddvq_s32(vdotq_s32(vzero, q3bytes.val[2], q8bytes_2.val[2])) * scale[2]; + isum += vaddvq_s32(vdotq_s32(vzero, q3bytes.val[3], q8bytes_2.val[3])) * scale[3]; +#else + p0 = vaddq_s16(vmull_s8(vget_low_s8 (q3bytes.val[0]), vget_low_s8 (q8bytes_2.val[0])), + vmull_s8(vget_high_s8(q3bytes.val[0]), vget_high_s8(q8bytes_2.val[0]))); + p1 = vaddq_s16(vmull_s8(vget_low_s8 (q3bytes.val[1]), vget_low_s8 (q8bytes_2.val[1])), + vmull_s8(vget_high_s8(q3bytes.val[1]), vget_high_s8(q8bytes_2.val[1]))); + p2 = vaddq_s16(vmull_s8(vget_low_s8 (q3bytes.val[2]), vget_low_s8 (q8bytes_2.val[2])), + vmull_s8(vget_high_s8(q3bytes.val[2]), vget_high_s8(q8bytes_2.val[2]))); + p3 = vaddq_s16(vmull_s8(vget_low_s8 (q3bytes.val[3]), vget_low_s8 (q8bytes_2.val[3])), + vmull_s8(vget_high_s8(q3bytes.val[3]), vget_high_s8(q8bytes_2.val[3]))); + isum += vaddvq_s16(p0) * scale[0] + vaddvq_s16(p1) * scale[1] + vaddvq_s16(p2) * scale[2] + vaddvq_s16(p3) * scale[3]; +#endif + scale += 4; + + if (j == 0) { + qhbits.val[0] = vshrq_n_u8(qhbits.val[0], 4); + qhbits.val[1] = vshrq_n_u8(qhbits.val[1], 4); + } + + } + sum += d * isum; + + } + + *s = sum; + +#elif defined __AVX2__ + + const __m256i m3 = _mm256_set1_epi8(3); + const __m256i mone = _mm256_set1_epi8(1); + const __m128i m32 = _mm_set1_epi8(32); + + __m256 acc = _mm256_setzero_ps(); + + uint32_t aux[3]; + + for (int i = 0; i < nb; ++i) { + + const float d = y[i].d * ggml_fp16_to_fp32(x[i].d); + + const uint8_t * restrict q3 = x[i].qs; + const int8_t * restrict q8 = y[i].qs; + + // Set up scales + memcpy(aux, x[i].scales, 12); + __m128i scales128 = _mm_set_epi32( + ((aux[1] >> 4) & kmask2) | (((aux[2] >> 6) & kmask1) << 4), + ((aux[0] >> 4) & kmask2) | (((aux[2] >> 4) & kmask1) << 4), + (aux[1] & kmask2) | (((aux[2] >> 2) & kmask1) << 4), + (aux[0] & kmask2) | (((aux[2] >> 0) & kmask1) << 4)); + scales128 = _mm_sub_epi8(scales128, m32); + const __m256i all_scales = _mm256_cvtepi8_epi16(scales128); + const __m128i l_scales = _mm256_extracti128_si256(all_scales, 0); + const __m128i h_scales = _mm256_extracti128_si256(all_scales, 1); + const __m256i scales[2] = {_mm256_set_m128i(l_scales, l_scales), _mm256_set_m128i(h_scales, h_scales)}; + + // high bit + const __m256i hbits = _mm256_loadu_si256((const __m256i*)x[i].hmask); + + // integer accumulator + __m256i sumi = _mm256_setzero_si256(); + + int bit = 0; + int is = 0; + + for (int j = 0; j < QK_K/128; ++j) { + // load low 2 bits + const __m256i q3bits = _mm256_loadu_si256((const __m256i*)q3); q3 += 32; + + // prepare low and high bits + const __m256i q3l_0 = _mm256_and_si256(q3bits, m3); + const __m256i q3h_0 = _mm256_slli_epi16(_mm256_srli_epi16(_mm256_andnot_si256(hbits, _mm256_slli_epi16(mone, bit)), bit), 2); + ++bit; + + const __m256i q3l_1 = _mm256_and_si256(_mm256_srli_epi16(q3bits, 2), m3); + const __m256i q3h_1 = _mm256_slli_epi16(_mm256_srli_epi16(_mm256_andnot_si256(hbits, _mm256_slli_epi16(mone, bit)), bit), 2); + ++bit; + + const __m256i q3l_2 = _mm256_and_si256(_mm256_srli_epi16(q3bits, 4), m3); + const __m256i q3h_2 = _mm256_slli_epi16(_mm256_srli_epi16(_mm256_andnot_si256(hbits, _mm256_slli_epi16(mone, bit)), bit), 2); + ++bit; + + const __m256i q3l_3 = _mm256_and_si256(_mm256_srli_epi16(q3bits, 6), m3); + const __m256i q3h_3 = _mm256_slli_epi16(_mm256_srli_epi16(_mm256_andnot_si256(hbits, _mm256_slli_epi16(mone, bit)), bit), 2); + ++bit; + + // load Q8 quants + const __m256i q8_0 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_1 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_2 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_3 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + + // Dot product: we multiply the 2 low bits and 1 high bit part separately, so we can use _mm256_maddubs_epi16, + // and then subtract. The high bit part has the 2 already subtracted (and so, it is zero if the high bit was not set, + // and 2 if the high bit was set) + __m256i q8s_0 = _mm256_maddubs_epi16(q3h_0, q8_0); + __m256i q8s_1 = _mm256_maddubs_epi16(q3h_1, q8_1); + __m256i q8s_2 = _mm256_maddubs_epi16(q3h_2, q8_2); + __m256i q8s_3 = _mm256_maddubs_epi16(q3h_3, q8_3); + + __m256i p16_0 = _mm256_maddubs_epi16(q3l_0, q8_0); + __m256i p16_1 = _mm256_maddubs_epi16(q3l_1, q8_1); + __m256i p16_2 = _mm256_maddubs_epi16(q3l_2, q8_2); + __m256i p16_3 = _mm256_maddubs_epi16(q3l_3, q8_3); + + p16_0 = _mm256_sub_epi16(p16_0, q8s_0); + p16_1 = _mm256_sub_epi16(p16_1, q8s_1); + p16_2 = _mm256_sub_epi16(p16_2, q8s_2); + p16_3 = _mm256_sub_epi16(p16_3, q8s_3); + + // multiply with scales + p16_0 = _mm256_madd_epi16(_mm256_shuffle_epi8(scales[j], get_scale_shuffle_q3k(is + 0)), p16_0); + p16_1 = _mm256_madd_epi16(_mm256_shuffle_epi8(scales[j], get_scale_shuffle_q3k(is + 1)), p16_1); + p16_2 = _mm256_madd_epi16(_mm256_shuffle_epi8(scales[j], get_scale_shuffle_q3k(is + 2)), p16_2); + p16_3 = _mm256_madd_epi16(_mm256_shuffle_epi8(scales[j], get_scale_shuffle_q3k(is + 3)), p16_3); + + // accumulate + p16_0 = _mm256_add_epi32(p16_0, p16_1); + p16_2 = _mm256_add_epi32(p16_2, p16_3); + sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p16_0, p16_2)); + + } + + // multiply with block scale and accumulate + acc = _mm256_fmadd_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(sumi), acc); + + } + + *s = hsum_float_8(acc); + +#else + // scalar version + // This function is written like this so the compiler can manage to vectorize most of it + // Using -Ofast, GCC and clang manage to produce code that is within a factor of 2 or so from the + // manually vectorized version above. Every other version I tried would run at least 4 times slower. + // The ideal situation would be if we could just write the code once, and the compiler would + // automatically produce the best possible set of machine instructions, instead of us having to manually + // write vectorized versions for AVX, ARM_NEON, etc. + + int8_t aux8[QK_K]; + int16_t aux16[8]; + float sums [8]; + int32_t aux32[8]; + memset(sums, 0, 8*sizeof(float)); + + uint32_t auxs[4]; + const int8_t * scales = (const int8_t*)auxs; + + float sumf = 0; + for (int i = 0; i < nb; ++i) { + const uint8_t * restrict q3 = x[i].qs; + const uint8_t * restrict hm = x[i].hmask; + const int8_t * restrict q8 = y[i].qs; + memset(aux32, 0, 8*sizeof(int32_t)); + int8_t * restrict a = aux8; + uint8_t m = 1; + for (int j = 0; j < QK_K; j += 128) { + for (int l = 0; l < 32; ++l) a[l] = q3[l] & 3; + for (int l = 0; l < 32; ++l) a[l] -= (hm[l] & m ? 0 : 4); + a += 32; m <<= 1; + for (int l = 0; l < 32; ++l) a[l] = (q3[l] >> 2) & 3; + for (int l = 0; l < 32; ++l) a[l] -= (hm[l] & m ? 0 : 4); + a += 32; m <<= 1; + for (int l = 0; l < 32; ++l) a[l] = (q3[l] >> 4) & 3; + for (int l = 0; l < 32; ++l) a[l] -= (hm[l] & m ? 0 : 4); + a += 32; m <<= 1; + for (int l = 0; l < 32; ++l) a[l] = (q3[l] >> 6) & 3; + for (int l = 0; l < 32; ++l) a[l] -= (hm[l] & m ? 0 : 4); + a += 32; m <<= 1; + q3 += 32; + } + a = aux8; + + memcpy(auxs, x[i].scales, 12); + uint32_t tmp = auxs[2]; + auxs[2] = ((auxs[0] >> 4) & kmask2) | (((tmp >> 4) & kmask1) << 4); + auxs[3] = ((auxs[1] >> 4) & kmask2) | (((tmp >> 6) & kmask1) << 4); + auxs[0] = (auxs[0] & kmask2) | (((tmp >> 0) & kmask1) << 4); + auxs[1] = (auxs[1] & kmask2) | (((tmp >> 2) & kmask1) << 4); + for (int j = 0; j < QK_K/16; ++j) { + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; + q8 += 8; a += 8; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; + q8 += 8; a += 8; + } + const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d; + for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; + } + for (int l = 0; l < 8; ++l) sumf += sums[l]; + *s = sumf; + +#endif + +} + +void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + assert(n % QK_K == 0); + + const block_q4_K * restrict x = vx; + const block_q8_K * restrict y = vy; + + const int nb = n / QK_K; + + static const uint32_t kmask1 = 0x3f3f3f3f; + static const uint32_t kmask2 = 0x0f0f0f0f; + static const uint32_t kmask3 = 0x03030303; + + uint32_t utmp[4]; + +#ifdef __ARM_NEON + + const uint8x16_t m4b = vdupq_n_u8(0xf); +#ifdef __ARM_FEATURE_DOTPROD + const int32x4_t mzero = vdupq_n_s32(0); +#endif + + int8x16x2_t q4bytes; + int8x16x2_t q8bytes; + + float sumf = 0; + + for (int i = 0; i < nb; ++i) { + + const float d = y[i].d * ggml_fp16_to_fp32(x[i].d); + const float dmin = y[i].d * ggml_fp16_to_fp32(x[i].dmin); + + const int16x8_t q8sums = vpaddq_s16(vld1q_s16(y[i].bsums), vld1q_s16(y[i].bsums + 8)); + + memcpy(utmp, x[i].scales, 12); + + const uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)}; + utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4); + utmp[0] &= kmask1; + + const int16x8_t mins = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(mins8))); + const int32x4_t prod = vaddq_s32(vmull_s16(vget_low_s16 (q8sums), vget_low_s16 (mins)), + vmull_s16(vget_high_s16(q8sums), vget_high_s16(mins))); + sumf -= dmin * vaddvq_s32(prod); + + const uint8_t * scales = (const uint8_t *)utmp; + + const uint8_t * restrict q4 = x[i].qs; + const int8_t * restrict q8 = y[i].qs; + + //int32x4_t isum = mzero; + + int32_t sumi1 = 0; + int32_t sumi2 = 0; + + for (int j = 0; j < QK_K/64; ++j) { + + const uint8x16x2_t q4bits = vld1q_u8_x2(q4); q4 += 32; + +#ifdef __ARM_FEATURE_DOTPROD + q8bytes = vld1q_s8_x2(q8); q8 += 32; + q4bytes.val[0] = vreinterpretq_s8_u8(vandq_u8 (q4bits.val[0], m4b)); + q4bytes.val[1] = vreinterpretq_s8_u8(vandq_u8 (q4bits.val[1], m4b)); + + const int32x4_t p1 = vdotq_s32(vdotq_s32(mzero, q4bytes.val[0], q8bytes.val[0]), q4bytes.val[1], q8bytes.val[1]); + sumi1 += vaddvq_s32(p1) * scales[2*j+0]; + + q8bytes = vld1q_s8_x2(q8); q8 += 32; + q4bytes.val[0] = vreinterpretq_s8_u8(vshrq_n_u8(q4bits.val[0], 4)); + q4bytes.val[1] = vreinterpretq_s8_u8(vshrq_n_u8(q4bits.val[1], 4)); + + const int32x4_t p2 = vdotq_s32(vdotq_s32(mzero, q4bytes.val[0], q8bytes.val[0]), q4bytes.val[1], q8bytes.val[1]); + + sumi2 += vaddvq_s32(p2) * scales[2*j+1]; +#else + q8bytes = vld1q_s8_x2(q8); q8 += 32; + q4bytes.val[0] = vreinterpretq_s8_u8(vandq_u8 (q4bits.val[0], m4b)); + q4bytes.val[1] = vreinterpretq_s8_u8(vandq_u8 (q4bits.val[1], m4b)); + const int16x8_t p0 = vaddq_s16(vmull_s8(vget_low_s8 (q4bytes.val[0]), vget_low_s8 (q8bytes.val[0])), + vmull_s8(vget_high_s8(q4bytes.val[0]), vget_high_s8(q8bytes.val[0]))); + const int16x8_t p1 = vaddq_s16(vmull_s8(vget_low_s8 (q4bytes.val[1]), vget_low_s8 (q8bytes.val[1])), + vmull_s8(vget_high_s8(q4bytes.val[1]), vget_high_s8(q8bytes.val[1]))); + sumi1 += vaddvq_s16(vaddq_s16(p0, p1)) * scales[2*j+0]; + + q8bytes = vld1q_s8_x2(q8); q8 += 32; + q4bytes.val[0] = vreinterpretq_s8_u8(vshrq_n_u8(q4bits.val[0], 4)); + q4bytes.val[1] = vreinterpretq_s8_u8(vshrq_n_u8(q4bits.val[1], 4)); + const int16x8_t p2 = vaddq_s16(vmull_s8(vget_low_s8 (q4bytes.val[0]), vget_low_s8 (q8bytes.val[0])), + vmull_s8(vget_high_s8(q4bytes.val[0]), vget_high_s8(q8bytes.val[0]))); + const int16x8_t p3 = vaddq_s16(vmull_s8(vget_low_s8 (q4bytes.val[1]), vget_low_s8 (q8bytes.val[1])), + vmull_s8(vget_high_s8(q4bytes.val[1]), vget_high_s8(q8bytes.val[1]))); + sumi2 += vaddvq_s16(vaddq_s16(p2, p3)) * scales[2*j+1]; + +#endif + } + + sumf += d * (sumi1 + sumi2); + + } + + *s = sumf; + +#elif defined __AVX2__ + + const __m256i m4 = _mm256_set1_epi8(0xF); + + __m256 acc = _mm256_setzero_ps(); + __m128 acc_m = _mm_setzero_ps(); + + for (int i = 0; i < nb; ++i) { + + const float d = y[i].d * ggml_fp16_to_fp32(x[i].d); + const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin); + + const uint8_t * restrict q4 = x[i].qs; + const int8_t * restrict q8 = y[i].qs; + + memcpy(utmp, x[i].scales, 12); + utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4); + const uint32_t uaux = utmp[1] & kmask1; + utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4); + utmp[2] = uaux; + utmp[0] &= kmask1; + + const __m256i mins_and_scales = _mm256_cvtepu8_epi16(_mm_set_epi32(utmp[3], utmp[2], utmp[1], utmp[0])); + + const __m256i q8sums = _mm256_loadu_si256((const __m256i*)y[i].bsums); + const __m128i q8s = _mm_hadd_epi16(_mm256_extracti128_si256(q8sums, 0), _mm256_extracti128_si256(q8sums, 1)); + const __m128i prod = _mm_madd_epi16(_mm256_extracti128_si256(mins_and_scales, 1), q8s); + acc_m = _mm_fmadd_ps(_mm_set1_ps(dmin), _mm_cvtepi32_ps(prod), acc_m); + + const __m128i sc128 = _mm256_extracti128_si256(mins_and_scales, 0); + const __m256i scales = _mm256_set_m128i(sc128, sc128); + + __m256i sumi = _mm256_setzero_si256(); + + for (int j = 0; j < QK_K/64; ++j) { + + const __m256i scale_l = _mm256_shuffle_epi8(scales, get_scale_shuffle_k4(2*j+0)); + const __m256i scale_h = _mm256_shuffle_epi8(scales, get_scale_shuffle_k4(2*j+1)); + + const __m256i q4bits = _mm256_loadu_si256((const __m256i*)q4); q4 += 32; + const __m256i q4l = _mm256_and_si256(q4bits, m4); + const __m256i q4h = _mm256_and_si256(_mm256_srli_epi16(q4bits, 4), m4); + + const __m256i q8l = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + __m256i p16l = _mm256_maddubs_epi16(q4l, q8l); + p16l = _mm256_madd_epi16(scale_l, p16l); + sumi = _mm256_add_epi32(sumi, p16l); + + const __m256i q8h = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + __m256i p16h = _mm256_maddubs_epi16(q4h, q8h); + p16h = _mm256_madd_epi16(scale_h, p16h); + sumi = _mm256_add_epi32(sumi, p16h); + + } + + __m256 vd = _mm256_set1_ps(d); + acc = _mm256_fmadd_ps(vd, _mm256_cvtepi32_ps(sumi), acc); + + } + + acc_m = _mm_add_ps(acc_m, _mm_movehl_ps(acc_m, acc_m)); + acc_m = _mm_add_ss(acc_m, _mm_movehdup_ps(acc_m)); + + *s = hsum_float_8(acc) + _mm_cvtss_f32(acc_m); + +#else + + + const uint8_t * scales = (const uint8_t*)&utmp[0]; + const uint8_t * mins = (const uint8_t*)&utmp[2]; + + int8_t aux8[QK_K]; + int16_t aux16[8]; + float sums [8]; + int32_t aux32[8]; + memset(sums, 0, 8*sizeof(float)); + + float sumf = 0; + for (int i = 0; i < nb; ++i) { + const uint8_t * restrict q4 = x[i].qs; + const int8_t * restrict q8 = y[i].qs; + memset(aux32, 0, 8*sizeof(int32_t)); + int8_t * restrict a = aux8; + for (int j = 0; j < QK_K/64; ++j) { + for (int l = 0; l < 32; ++l) a[l] = (int8_t)(q4[l] & 0xF); + a += 32; + for (int l = 0; l < 32; ++l) a[l] = (int8_t)(q4[l] >> 4); + a += 32; q4 += 32; + } + memcpy(utmp, x[i].scales, 12); + utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4); + const uint32_t uaux = utmp[1] & kmask1; + utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4); + utmp[2] = uaux; + utmp[0] &= kmask1; + + int sumi = 0; + for (int j = 0; j < QK_K/16; ++j) sumi += y[i].bsums[j] * mins[j/2]; + a = aux8; + int is = 0; + for (int j = 0; j < QK_K/32; ++j) { + int32_t scale = scales[is++]; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + } + const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d; + for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; + const float dmin = ggml_fp16_to_fp32(x[i].dmin) * y[i].d; + sumf -= dmin * sumi; + } + for (int l = 0; l < 8; ++l) sumf += sums[l]; + *s = sumf; +#endif +} + +void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + assert(n % QK_K == 0); + + const block_q5_K * restrict x = vx; + const block_q8_K * restrict y = vy; + + const int nb = n / QK_K; + + static const uint32_t kmask1 = 0x3f3f3f3f; + static const uint32_t kmask2 = 0x0f0f0f0f; + static const uint32_t kmask3 = 0x03030303; + + uint32_t utmp[4]; + + +#ifdef __ARM_NEON + + const uint8x16_t m4b = vdupq_n_u8(0xf); + const int32x4_t mzero = vdupq_n_s32(0); + const uint8x16_t mone = vdupq_n_u8(1); + const uint8x16_t mtwo = vdupq_n_u8(2); + + int8x16x4_t q5bytes; + + float sumf = 0; + + for (int i = 0; i < nb; ++i) { + + const float d = y[i].d * ggml_fp16_to_fp32(x[i].d); + const float dmin = y[i].d * ggml_fp16_to_fp32(x[i].dmin); + + const int16x8_t q8sums = vpaddq_s16(vld1q_s16(y[i].bsums), vld1q_s16(y[i].bsums + 8)); + + memcpy(utmp, x[i].scales, 12); + utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4); + const uint32_t uaux = utmp[1] & kmask1; + utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4); + utmp[2] = uaux; + utmp[0] &= kmask1; + + const uint8x8_t mins8 = vld1_u8((const uint8_t*)utmp + 8); + const int16x8_t mins = vreinterpretq_s16_u16(vmovl_u8(mins8)); + const int32x4_t prod = vaddq_s32(vmull_s16(vget_low_s16 (q8sums), vget_low_s16 (mins)), + vmull_s16(vget_high_s16(q8sums), vget_high_s16(mins))); + int32_t sumi_mins = vaddvq_s32(prod); + + const uint8_t * scales = (const uint8_t *)utmp; + + const uint8_t * restrict q5 = x[i].qs; + const uint8_t * restrict qh = x[i].qh; + const int8_t * restrict q8 = y[i].qs; + + uint8x16x2_t qhbits = vld1q_u8_x2(qh); + + uint8x16x4_t q5h; + + int32_t sumi = 0; + + for (int j = 0; j < QK_K/64; ++j) { + + const uint8x16x2_t q5bits = vld1q_u8_x2(q5); q5 += 32; + const int8x16x4_t q8bytes = vld1q_s8_x4(q8); q8 += 64; + + q5h.val[0] = vshlq_n_u8(vandq_u8(mone, qhbits.val[0]), 4); + q5h.val[1] = vshlq_n_u8(vandq_u8(mone, qhbits.val[1]), 4); + q5h.val[2] = vshlq_n_u8(vandq_u8(mtwo, qhbits.val[0]), 3); + q5h.val[3] = vshlq_n_u8(vandq_u8(mtwo, qhbits.val[1]), 3); + qhbits.val[0] = vshrq_n_u8(qhbits.val[0], 2); + qhbits.val[1] = vshrq_n_u8(qhbits.val[1], 2); + + q5bytes.val[0] = vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q5bits.val[0], m4b), q5h.val[0])); + q5bytes.val[1] = vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q5bits.val[1], m4b), q5h.val[1])); + q5bytes.val[2] = vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q5bits.val[0], 4), q5h.val[2])); + q5bytes.val[3] = vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q5bits.val[1], 4), q5h.val[3])); + +#if defined(__ARM_FEATURE_DOTPROD) + + sumi += vaddvq_s32(vdotq_s32(vdotq_s32(mzero, q5bytes.val[0], q8bytes.val[0]), q5bytes.val[1], q8bytes.val[1])) * *scales++; + sumi += vaddvq_s32(vdotq_s32(vdotq_s32(mzero, q5bytes.val[2], q8bytes.val[2]), q5bytes.val[3], q8bytes.val[3])) * *scales++; +#else + + const int16x8_t p0 = vaddq_s16(vmull_s8(vget_low_s8 (q5bytes.val[0]), vget_low_s8 (q8bytes.val[0])), + vmull_s8(vget_high_s8(q5bytes.val[0]), vget_high_s8(q8bytes.val[0]))); + const int16x8_t p1 = vaddq_s16(vmull_s8(vget_low_s8 (q5bytes.val[1]), vget_low_s8 (q8bytes.val[1])), + vmull_s8(vget_high_s8(q5bytes.val[1]), vget_high_s8(q8bytes.val[1]))); + sumi += vaddvq_s16(vaddq_s16(p0, p1)) * *scales++; + + const int16x8_t p2 = vaddq_s16(vmull_s8(vget_low_s8 (q5bytes.val[2]), vget_low_s8 (q8bytes.val[2])), + vmull_s8(vget_high_s8(q5bytes.val[2]), vget_high_s8(q8bytes.val[2]))); + const int16x8_t p3 = vaddq_s16(vmull_s8(vget_low_s8 (q5bytes.val[3]), vget_low_s8 (q8bytes.val[3])), + vmull_s8(vget_high_s8(q5bytes.val[3]), vget_high_s8(q8bytes.val[3]))); + sumi += vaddvq_s16(vaddq_s16(p2, p3)) * *scales++; +#endif + } + + sumf += d * sumi - dmin * sumi_mins; + + } + + *s = sumf; + +#elif defined __AVX2__ + + const __m256i m4 = _mm256_set1_epi8(0xF); + const __m128i mzero = _mm_setzero_si128(); + const __m256i mone = _mm256_set1_epi8(1); + + __m256 acc = _mm256_setzero_ps(); + + float summs = 0.f; + + for (int i = 0; i < nb; ++i) { + + const float d = y[i].d * ggml_fp16_to_fp32(x[i].d); + const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin); + + const uint8_t * restrict q5 = x[i].qs; + const int8_t * restrict q8 = y[i].qs; + + memcpy(utmp, x[i].scales, 12); + utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4); + const uint32_t uaux = utmp[1] & kmask1; + utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4); + utmp[2] = uaux; + utmp[0] &= kmask1; + + const __m256i mins_and_scales = _mm256_cvtepu8_epi16(_mm_set_epi32(utmp[3], utmp[2], utmp[1], utmp[0])); + + const __m256i q8sums = _mm256_loadu_si256((const __m256i*)y[i].bsums); + const __m128i q8s = _mm_hadd_epi16(_mm256_extracti128_si256(q8sums, 0), _mm256_extracti128_si256(q8sums, 1)); + const __m128i prod = _mm_madd_epi16(_mm256_extracti128_si256(mins_and_scales, 1), q8s); + const __m128i hsum = _mm_hadd_epi32(_mm_hadd_epi32(prod, mzero), mzero); + summs += dmin * _mm_extract_epi32(hsum, 0); + + const __m128i sc128 = _mm256_extracti128_si256(mins_and_scales, 0); + const __m256i scales = _mm256_set_m128i(sc128, sc128); + + const __m256i hbits = _mm256_loadu_si256((const __m256i*)x[i].qh); + __m256i hmask = mone; + + __m256i sumi = _mm256_setzero_si256(); + + int bit = 0; + + for (int j = 0; j < QK_K/64; ++j) { + + const __m256i scale_0 = _mm256_shuffle_epi8(scales, get_scale_shuffle_k4(2*j+0)); + const __m256i scale_1 = _mm256_shuffle_epi8(scales, get_scale_shuffle_k4(2*j+1)); + + const __m256i q5bits = _mm256_loadu_si256((const __m256i*)q5); q5 += 32; + + const __m256i q5l_0 = _mm256_and_si256(q5bits, m4); + const __m256i q5h_0 = _mm256_slli_epi16(_mm256_srli_epi16(_mm256_and_si256(hbits, hmask), bit++), 4); + const __m256i q5_0 = _mm256_add_epi8(q5l_0, q5h_0); + hmask = _mm256_slli_epi16(hmask, 1); + + const __m256i q5l_1 = _mm256_and_si256(_mm256_srli_epi16(q5bits, 4), m4); + const __m256i q5h_1 = _mm256_slli_epi16(_mm256_srli_epi16(_mm256_and_si256(hbits, hmask), bit++), 4); + const __m256i q5_1 = _mm256_add_epi8(q5l_1, q5h_1); + hmask = _mm256_slli_epi16(hmask, 1); + + const __m256i q8_0 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_1 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + + __m256i p16_0 = _mm256_maddubs_epi16(q5_0, q8_0); + __m256i p16_1 = _mm256_maddubs_epi16(q5_1, q8_1); + + p16_0 = _mm256_madd_epi16(scale_0, p16_0); + p16_1 = _mm256_madd_epi16(scale_1, p16_1); + + sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p16_0, p16_1)); + + } + + __m256 vd = _mm256_set1_ps(d); + acc = _mm256_fmadd_ps(vd, _mm256_cvtepi32_ps(sumi), acc); + + } + + *s = hsum_float_8(acc) + summs; + +#else + + const uint8_t * scales = (const uint8_t*)&utmp[0]; + const uint8_t * mins = (const uint8_t*)&utmp[2]; + + int8_t aux8[QK_K]; + int16_t aux16[8]; + float sums [8]; + int32_t aux32[8]; + memset(sums, 0, 8*sizeof(float)); + + float sumf = 0; + for (int i = 0; i < nb; ++i) { + const uint8_t * restrict q4 = x[i].qs; + const uint8_t * restrict hm = x[i].qh; + const int8_t * restrict q8 = y[i].qs; + memset(aux32, 0, 8*sizeof(int32_t)); + int8_t * restrict a = aux8; + uint8_t m = 1; + for (int j = 0; j < QK_K/64; ++j) { + for (int l = 0; l < 32; ++l) a[l] = (int8_t)(q4[l] & 0xF); + for (int l = 0; l < 32; ++l) a[l] += (hm[l] & m ? 16 : 0); + a += 32; m <<= 1; + for (int l = 0; l < 32; ++l) a[l] = (int8_t)(q4[l] >> 4); + for (int l = 0; l < 32; ++l) a[l] += (hm[l] & m ? 16 : 0); + a += 32; m <<= 1; + q4 += 32; + } + memcpy(utmp, x[i].scales, 12); + utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4); + const uint32_t uaux = utmp[1] & kmask1; + utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4); + utmp[2] = uaux; + utmp[0] &= kmask1; + + int sumi = 0; + for (int j = 0; j < QK_K/16; ++j) sumi += y[i].bsums[j] * mins[j/2]; + a = aux8; + int is = 0; + for (int j = 0; j < QK_K/32; ++j) { + int32_t scale = scales[is++]; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + } + const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d; + for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; + const float dmin = ggml_fp16_to_fp32(x[i].dmin) * y[i].d; + sumf -= dmin * sumi; + } + for (int l = 0; l < 8; ++l) sumf += sums[l]; + *s = sumf; +#endif +} + + + +void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { + assert(n % QK_K == 0); + + const block_q6_K * restrict x = vx; + const block_q8_K * restrict y = vy; + + const int nb = n / QK_K; + +#ifdef __ARM_NEON + + float sum = 0; + + const uint8x16_t m4b = vdupq_n_u8(0xF); + const int32x4_t vzero = vdupq_n_s32(0); + //const int8x16_t m32s = vdupq_n_s8(32); + + const uint8x16_t mone = vdupq_n_u8(3); + + int8x16x4_t q6bytes; + uint8x16x4_t q6h; + + for (int i = 0; i < nb; ++i) { + + const float d_all = ggml_fp16_to_fp32(x[i].d); + + const uint8_t * restrict q6 = x[i].ql; + const uint8_t * restrict qh = x[i].qh; + const int8_t * restrict q8 = y[i].qs; + + const int8_t * restrict scale = x[i].scales; + + const int16x8x2_t q8sums = vld1q_s16_x2(y[i].bsums); + const int8x16_t scales = vld1q_s8(scale); + const int16x8x2_t q6scales = {vmovl_s8(vget_low_s8(scales)), vmovl_s8(vget_high_s8(scales))}; + + const int32x4_t prod = vaddq_s32(vaddq_s32(vmull_s16(vget_low_s16 (q8sums.val[0]), vget_low_s16 (q6scales.val[0])), + vmull_s16(vget_high_s16(q8sums.val[0]), vget_high_s16(q6scales.val[0]))), + vaddq_s32(vmull_s16(vget_low_s16 (q8sums.val[1]), vget_low_s16 (q6scales.val[1])), + vmull_s16(vget_high_s16(q8sums.val[1]), vget_high_s16(q6scales.val[1])))); + int32_t isum_mins = vaddvq_s32(prod); + + int32_t isum = 0; + + for (int j = 0; j < QK_K/128; ++j) { + + uint8x16x2_t qhbits = vld1q_u8_x2(qh); qh += 32; + uint8x16x4_t q6bits = vld1q_u8_x4(q6); q6 += 64; + int8x16x4_t q8bytes = vld1q_s8_x4(q8); q8 += 64; + + q6h.val[0] = vshlq_n_u8(vandq_u8(mone, qhbits.val[0]), 4); + q6h.val[1] = vshlq_n_u8(vandq_u8(mone, qhbits.val[1]), 4); + uint8x16_t shifted = vshrq_n_u8(qhbits.val[0], 2); + q6h.val[2] = vshlq_n_u8(vandq_u8(mone, shifted), 4); + shifted = vshrq_n_u8(qhbits.val[1], 2); + q6h.val[3] = vshlq_n_u8(vandq_u8(mone, shifted), 4); + + //q6bytes.val[0] = vsubq_s8(vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q6bits.val[0], m4b), q6h.val[0])), m32s); + //q6bytes.val[1] = vsubq_s8(vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q6bits.val[1], m4b), q6h.val[1])), m32s); + //q6bytes.val[2] = vsubq_s8(vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q6bits.val[2], m4b), q6h.val[2])), m32s); + //q6bytes.val[3] = vsubq_s8(vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q6bits.val[3], m4b), q6h.val[3])), m32s); + q6bytes.val[0] = vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q6bits.val[0], m4b), q6h.val[0])); + q6bytes.val[1] = vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q6bits.val[1], m4b), q6h.val[1])); + q6bytes.val[2] = vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q6bits.val[2], m4b), q6h.val[2])); + q6bytes.val[3] = vreinterpretq_s8_u8(vorrq_u8(vandq_u8(q6bits.val[3], m4b), q6h.val[3])); + +#if defined(__ARM_FEATURE_DOTPROD) + + isum += vaddvq_s32(vdotq_s32(vzero, q6bytes.val[0], q8bytes.val[0])) * scale[0] + + vaddvq_s32(vdotq_s32(vzero, q6bytes.val[1], q8bytes.val[1])) * scale[1] + + vaddvq_s32(vdotq_s32(vzero, q6bytes.val[2], q8bytes.val[2])) * scale[2] + + vaddvq_s32(vdotq_s32(vzero, q6bytes.val[3], q8bytes.val[3])) * scale[3]; + scale += 4; + +#else + + int16x8_t p0 = vaddq_s16(vmull_s8(vget_low_s8 (q6bytes.val[0]), vget_low_s8 (q8bytes.val[0])), + vmull_s8(vget_high_s8(q6bytes.val[0]), vget_high_s8(q8bytes.val[0]))); + int16x8_t p1 = vaddq_s16(vmull_s8(vget_low_s8 (q6bytes.val[1]), vget_low_s8 (q8bytes.val[1])), + vmull_s8(vget_high_s8(q6bytes.val[1]), vget_high_s8(q8bytes.val[1]))); + isum += vaddvq_s16(p0) * scale[0] + vaddvq_s16(p1) * scale[1]; + scale += 2; + + int16x8_t p2 = vaddq_s16(vmull_s8(vget_low_s8 (q6bytes.val[2]), vget_low_s8 (q8bytes.val[2])), + vmull_s8(vget_high_s8(q6bytes.val[2]), vget_high_s8(q8bytes.val[2]))); + int16x8_t p3 = vaddq_s16(vmull_s8(vget_low_s8 (q6bytes.val[3]), vget_low_s8 (q8bytes.val[3])), + vmull_s8(vget_high_s8(q6bytes.val[3]), vget_high_s8(q8bytes.val[3]))); + isum += vaddvq_s16(p2) * scale[0] + vaddvq_s16(p3) * scale[1]; + scale += 2; +#endif + + q8bytes = vld1q_s8_x4(q8); q8 += 64; + + shifted = vshrq_n_u8(qhbits.val[0], 4); + q6h.val[0] = vshlq_n_u8(vandq_u8(mone, shifted), 4); + shifted = vshrq_n_u8(qhbits.val[1], 4); + q6h.val[1] = vshlq_n_u8(vandq_u8(mone, shifted), 4); + shifted = vshrq_n_u8(qhbits.val[0], 6); + q6h.val[2] = vshlq_n_u8(vandq_u8(mone, shifted), 4); + shifted = vshrq_n_u8(qhbits.val[1], 6); + q6h.val[3] = vshlq_n_u8(vandq_u8(mone, shifted), 4); + + //q6bytes.val[0] = vsubq_s8(vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q6bits.val[0], 4), q6h.val[0])), m32s); + //q6bytes.val[1] = vsubq_s8(vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q6bits.val[1], 4), q6h.val[1])), m32s); + //q6bytes.val[2] = vsubq_s8(vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q6bits.val[2], 4), q6h.val[2])), m32s); + //q6bytes.val[3] = vsubq_s8(vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q6bits.val[3], 4), q6h.val[3])), m32s); + q6bytes.val[0] = vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q6bits.val[0], 4), q6h.val[0])); + q6bytes.val[1] = vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q6bits.val[1], 4), q6h.val[1])); + q6bytes.val[2] = vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q6bits.val[2], 4), q6h.val[2])); + q6bytes.val[3] = vreinterpretq_s8_u8(vorrq_u8(vshrq_n_u8(q6bits.val[3], 4), q6h.val[3])); + +#if defined(__ARM_FEATURE_DOTPROD) + + isum += vaddvq_s32(vdotq_s32(vzero, q6bytes.val[0], q8bytes.val[0])) * scale[0] + + vaddvq_s32(vdotq_s32(vzero, q6bytes.val[1], q8bytes.val[1])) * scale[1] + + vaddvq_s32(vdotq_s32(vzero, q6bytes.val[2], q8bytes.val[2])) * scale[2] + + vaddvq_s32(vdotq_s32(vzero, q6bytes.val[3], q8bytes.val[3])) * scale[3]; + scale += 4; + + //for (int l = 0; l < 4; ++l) { + // const int32x4_t p = vdotq_s32(vzero, q6bytes.val[l], q8bytes.val[l]); + // isum += vaddvq_s32(p) * *scale++; + //} +#else + p0 = vaddq_s16(vmull_s8(vget_low_s8 (q6bytes.val[0]), vget_low_s8 (q8bytes.val[0])), + vmull_s8(vget_high_s8(q6bytes.val[0]), vget_high_s8(q8bytes.val[0]))); + p1 = vaddq_s16(vmull_s8(vget_low_s8 (q6bytes.val[1]), vget_low_s8 (q8bytes.val[1])), + vmull_s8(vget_high_s8(q6bytes.val[1]), vget_high_s8(q8bytes.val[1]))); + isum += vaddvq_s16(p0) * scale[0] + vaddvq_s16(p1) * scale[1]; + scale += 2; + + p2 = vaddq_s16(vmull_s8(vget_low_s8 (q6bytes.val[2]), vget_low_s8 (q8bytes.val[2])), + vmull_s8(vget_high_s8(q6bytes.val[2]), vget_high_s8(q8bytes.val[2]))); + p3 = vaddq_s16(vmull_s8(vget_low_s8 (q6bytes.val[3]), vget_low_s8 (q8bytes.val[3])), + vmull_s8(vget_high_s8(q6bytes.val[3]), vget_high_s8(q8bytes.val[3]))); + isum += vaddvq_s16(p2) * scale[0] + vaddvq_s16(p3) * scale[1]; + scale += 2; +#endif + + } + //sum += isum * d_all * y[i].d; + sum += d_all * y[i].d * (isum - 32 * isum_mins); + + } + *s = sum; + +#elif defined __AVX2__ + + const __m256i m4 = _mm256_set1_epi8(0xF); + const __m256i m2 = _mm256_set1_epi8(3); + const __m256i m32s = _mm256_set1_epi8(32); + + __m256 acc = _mm256_setzero_ps(); + + for (int i = 0; i < nb; ++i) { + + const float d = y[i].d * ggml_fp16_to_fp32(x[i].d); + + const uint8_t * restrict q4 = x[i].ql; + const uint8_t * restrict qh = x[i].qh; + const int8_t * restrict q8 = y[i].qs; + + const __m128i scales = _mm_loadu_si128((const __m128i*)x[i].scales); + + __m256i sumi = _mm256_setzero_si256(); + + int is = 0; + + for (int j = 0; j < QK_K/128; ++j) { + + const __m128i scale_0 = _mm_shuffle_epi8(scales, get_scale_shuffle(is + 0)); + const __m128i scale_1 = _mm_shuffle_epi8(scales, get_scale_shuffle(is + 1)); + const __m128i scale_2 = _mm_shuffle_epi8(scales, get_scale_shuffle(is + 2)); + const __m128i scale_3 = _mm_shuffle_epi8(scales, get_scale_shuffle(is + 3)); + is += 4; + + const __m256i q4bits1 = _mm256_loadu_si256((const __m256i*)q4); q4 += 32; + const __m256i q4bits2 = _mm256_loadu_si256((const __m256i*)q4); q4 += 32; + const __m256i q4bitsH = _mm256_loadu_si256((const __m256i*)qh); qh += 32; + + const __m256i q4h_0 = _mm256_slli_epi16(_mm256_and_si256(q4bitsH, m2), 4); + const __m256i q4h_1 = _mm256_slli_epi16(_mm256_and_si256(_mm256_srli_epi16(q4bitsH, 2), m2), 4); + const __m256i q4h_2 = _mm256_slli_epi16(_mm256_and_si256(_mm256_srli_epi16(q4bitsH, 4), m2), 4); + const __m256i q4h_3 = _mm256_slli_epi16(_mm256_and_si256(_mm256_srli_epi16(q4bitsH, 6), m2), 4); + + const __m256i q4_0 = _mm256_or_si256(_mm256_and_si256(q4bits1, m4), q4h_0); + const __m256i q4_1 = _mm256_or_si256(_mm256_and_si256(q4bits2, m4), q4h_1); + const __m256i q4_2 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi16(q4bits1, 4), m4), q4h_2); + const __m256i q4_3 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi16(q4bits2, 4), m4), q4h_3); + + const __m256i q8_0 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_1 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_2 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8_3 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + + __m256i q8s_0 = _mm256_maddubs_epi16(m32s, q8_0); + __m256i q8s_1 = _mm256_maddubs_epi16(m32s, q8_1); + __m256i q8s_2 = _mm256_maddubs_epi16(m32s, q8_2); + __m256i q8s_3 = _mm256_maddubs_epi16(m32s, q8_3); + + __m256i p16_0 = _mm256_maddubs_epi16(q4_0, q8_0); + __m256i p16_1 = _mm256_maddubs_epi16(q4_1, q8_1); + __m256i p16_2 = _mm256_maddubs_epi16(q4_2, q8_2); + __m256i p16_3 = _mm256_maddubs_epi16(q4_3, q8_3); + + p16_0 = _mm256_sub_epi16(p16_0, q8s_0); + p16_1 = _mm256_sub_epi16(p16_1, q8s_1); + p16_2 = _mm256_sub_epi16(p16_2, q8s_2); + p16_3 = _mm256_sub_epi16(p16_3, q8s_3); + + p16_0 = _mm256_madd_epi16(_mm256_cvtepi8_epi16(scale_0), p16_0); + p16_1 = _mm256_madd_epi16(_mm256_cvtepi8_epi16(scale_1), p16_1); + p16_2 = _mm256_madd_epi16(_mm256_cvtepi8_epi16(scale_2), p16_2); + p16_3 = _mm256_madd_epi16(_mm256_cvtepi8_epi16(scale_3), p16_3); + + sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p16_0, p16_1)); + sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p16_2, p16_3)); + + } + + acc = _mm256_fmadd_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(sumi), acc); + } + + *s = hsum_float_8(acc); + +#else + + int8_t aux8[QK_K]; + int16_t aux16[8]; + float sums [8]; + int32_t aux32[8]; + memset(sums, 0, 8*sizeof(float)); + + float sumf = 0; + for (int i = 0; i < nb; ++i) { + const uint8_t * restrict q4 = x[i].ql; + const uint8_t * restrict qh = x[i].qh; + const int8_t * restrict q8 = y[i].qs; + memset(aux32, 0, 8*sizeof(int32_t)); + int8_t * restrict a = aux8; + for (int j = 0; j < QK_K; j += 128) { + for (int l = 0; l < 32; ++l) { + a[l + 0] = (int8_t)((q4[l + 0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32; + a[l + 32] = (int8_t)((q4[l + 32] & 0xF) | (((qh[l] >> 2) & 3) << 4)) - 32; + a[l + 64] = (int8_t)((q4[l + 0] >> 4) | (((qh[l] >> 4) & 3) << 4)) - 32; + a[l + 96] = (int8_t)((q4[l + 32] >> 4) | (((qh[l] >> 6) & 3) << 4)) - 32; + } + a += 128; + q4 += 64; + qh += 32; + } + a = aux8; + int is = 0; + for (int j = 0; j < QK_K/16; ++j) { + int scale = x[i].scales[is++]; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + for (int l = 0; l < 8; ++l) aux16[l] = q8[l] * a[l]; + for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; + q8 += 8; a += 8; + } + const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d; + for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; + } + for (int l = 0; l < 8; ++l) sumf += sums[l]; + *s = sumf; +#endif +} diff --git a/ctransformers/models/ggml/k_quants.h b/ctransformers/models/ggml/k_quants.h new file mode 100644 index 0000000000000000000000000000000000000000..10a0baac73a078f459ce0c21302bf79bd796cec2 --- /dev/null +++ b/ctransformers/models/ggml/k_quants.h @@ -0,0 +1,122 @@ +#pragma once + +#include "ggml.h" + +#include +#include +#include + +// Super-block size +#define QK_K 256 + +// +// Super-block quantization structures +// + +// 2-bit quantization +// weight is represented as x = a * q + b +// 16 blocks of 16 elemenets each +// Effectively 2.5625 bits per weight +typedef struct { + uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits + uint8_t qs[QK_K/4]; // quants + ggml_fp16_t d; // super-block scale for quantized scales + ggml_fp16_t dmin; // super-block scale for quantized mins +} block_q2_K; +static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding"); + +// 3-bit quantization +// weight is represented as x = a * q +// 16 blocks of 16 elemenets each +// Effectively 3.4375 bits per weight +typedef struct { + uint8_t hmask[QK_K/8]; // quants - high bit + uint8_t qs[QK_K/4]; // quants - low 2 bits + uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits + ggml_fp16_t d; // super-block scale +} block_q3_K; +static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + 11 * QK_K / 64, "wrong q3_K block size/padding"); + +// 4-bit quantization +// 16 blocks of 32 elements each +// weight is represented as x = a * q + b +// Effectively 4.5 bits per weight +typedef struct { + ggml_fp16_t d; // super-block scale for quantized scales + ggml_fp16_t dmin; // super-block scale for quantized mins + uint8_t scales[3*QK_K/64]; // scales and mins, quantized with 6 bits + uint8_t qs[QK_K/2]; // 4--bit quants +} block_q4_K; +static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding"); + +// 5-bit quantization +// 16 blocks of 32 elements each +// weight is represented as x = a * q + b +// Effectively 5.5 bits per weight +typedef struct { + ggml_fp16_t d; // super-block scale for quantized scales + ggml_fp16_t dmin; // super-block scale for quantized mins + uint8_t scales[3*QK_K/64]; // scales and mins, quantized with 6 bits + uint8_t qh[QK_K/8]; // quants, high bit + uint8_t qs[QK_K/2]; // quants, low 4 bits +} block_q5_K; +static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2 + QK_K/8, "wrong q5_K block size/padding"); + +// 6-bit quantization +// weight is represented as x = a * q +// 16 blocks of 16 elemenets each +// Effectively 6.5625 bits per weight +typedef struct { + uint8_t ql[QK_K/2]; // quants, lower 4 bits + uint8_t qh[QK_K/4]; // quants, upper 2 bits + int8_t scales[QK_K/16]; // scales, quantized with 8 bits + ggml_fp16_t d; // super-block scale +} block_q6_K; +static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding"); + +// This is only used for intermediate quantization and dot products +typedef struct { + float d; // delta + int8_t qs[QK_K]; // quants + int16_t bsums[QK_K/16]; // sum of quants in groups of 16 +} block_q8_K; +static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding"); + + +// Quantization +void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k); +void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k); +void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k); +void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k); +void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k); +void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k); + +void quantize_row_q2_K(const float * restrict x, void * restrict y, int k); +void quantize_row_q3_K(const float * restrict x, void * restrict y, int k); +void quantize_row_q4_K(const float * restrict x, void * restrict y, int k); +void quantize_row_q5_K(const float * restrict x, void * restrict y, int k); +void quantize_row_q6_K(const float * restrict x, void * restrict y, int k); +void quantize_row_q8_K(const float * restrict x, void * restrict y, int k); + +// Dequantization +void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k); +void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k); +void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k); +void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k); +void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k); +void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k); + +// Dot product +void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); +void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); +void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); +void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); +void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); + +// Quantization with histogram collection +size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist); +size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist); +size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist); +size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist); +size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist); + diff --git a/ctransformers/models/ggml/libfalcon.cpp b/ctransformers/models/ggml/libfalcon.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9bbe8d88c563869d33ed43ee7ed1f654e9f39658 --- /dev/null +++ b/ctransformers/models/ggml/libfalcon.cpp @@ -0,0 +1,3339 @@ +// Defines fileno on msys: +// inference&model based on the ggml falcon example PR from +// https://github.com/KerfuffleV2/ggml-falcon +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#include +#include +#include +#endif + +#include "ggml.h" +#include "libfalcon.h" +#include "llama-util.h" +#ifdef GGML_USE_CUBLAS +#include + +#include "ggml-cuda.h" +#elif defined(GGML_USE_CLBLAST) +#include "ggml-opencl.h" +#endif + +#ifdef GGML_USE_METAL +#include "ggml-metal.h" +#endif +#ifndef QK_K +#define QK_K 256 +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +// disable "possible loss of data" +#pragma warning(disable : 4244 4267) +#endif + +#define LLAMA_USE_SCRATCH +#define LLAMA_MAX_SCRATCH_BUFFERS 16 + +// available falcon models +enum falcon_e_model { + FALCON_UNKNOWN, + FALCON_7B, + FALCON_40B, +}; + +// computed for n_ctx == 2048 +// TODO: dynamically determine these sizes +// needs modifications in ggml + +typedef void (*offload_func_t)(struct ggml_tensor *tensor); + +static const std::map &FALCON_MEM_REQ_SCRATCH0() { + static std::map k_sizes = { + {FALCON_7B, 512ull * MB}, + {FALCON_40B, 1024ull * MB}, + }; + return k_sizes; +} + +static const std::map &FALCON_MEM_REQ_SCRATCH1() { + static std::map k_sizes = { + {FALCON_7B, 512ull * MB}, + {FALCON_40B, 1024ull * MB}, + }; + return k_sizes; +} + +// this is mostly needed for temporary mul_mat buffers to dequantize the data +// not actually needed if BLAS is disabled +static const std::map &FALCON_MEM_REQ_EVAL() { + static std::map k_sizes = { + {FALCON_7B, 768ull * MB}, + {FALCON_40B, 1536ull * MB}, + }; + return k_sizes; +} + +// default hparams (Falcon 7B) +struct falcon_hparams { + int32_t n_vocab = 65024; + int32_t n_ctx = 2048; + int32_t n_embd = 4544; + int32_t n_head = 71; + int32_t n_head_kv = 1; + int32_t n_layer = 32; + int32_t n_falcon_type = 7; // 7 for Falcon-7B, 40 for Falcon-40B + enum llama_ftype ftype = LLAMA_FTYPE_MOSTLY_F16; + + bool operator!=(const falcon_hparams &other) const { + return static_cast(memcmp(this, &other, sizeof(falcon_hparams))); + } +}; + +static size_t FALCON_MEM_REQ_KV_SELF(const falcon_hparams &hparams, + ggml_type wtype, int32_t n_ctx) { + const int n_head_kv = hparams.n_head_kv; + const int head_dim = hparams.n_embd / hparams.n_head; + const int n_layer = hparams.n_layer; + + const int64_t ne = n_head_kv * head_dim * n_layer * n_ctx; + + return 2u * (ggml_tensor_overhead() + ne * ggml_type_size(wtype)); +} + +struct falcon_layer { + // normalization + struct ggml_tensor *input_layernorm; + struct ggml_tensor *input_layernorm_b; + struct ggml_tensor *attention_norm; // Falcon-40B only + struct ggml_tensor *attention_norm_b; // Falcon-40B only + + // attention + struct ggml_tensor *query_key_value; + struct ggml_tensor *wo; + + // ff + struct ggml_tensor *ffn_up; + struct ggml_tensor *ffn_down; +}; + +struct falcon_kv_cache { + struct ggml_tensor *k; + struct ggml_tensor *v; + + struct ggml_context *ctx = NULL; + + llama_ctx_buffer buf; + + int n; // number of tokens currently in the cache + + ~falcon_kv_cache() { + if (ctx) { + ggml_free(ctx); + } + } +}; + +struct falcon_model { + falcon_e_model type = FALCON_UNKNOWN; + + falcon_hparams hparams; + + struct ggml_tensor *tok_embeddings; + struct ggml_tensor *output_norm; + struct ggml_tensor *output_norm_b; + struct ggml_tensor *lm_head; + // struct ggml_tensor* output; + + std::vector layers; + + int n_gpu_layers; + int i_gpu_start; + int i_gpu_last; + + // context + struct ggml_context *ctx = NULL; + std::map tensors; + + // key + value cache for the self attention + // TODO: move to llama_state + struct falcon_kv_cache kv_self; + + // the model memory buffer + llama_ctx_buffer buf; + + // model memory mapped file + std::unique_ptr mapping; + + // objects representing data potentially being locked in memory + llama_mlock mlock_buf; + llama_mlock mlock_mmap; + + // for quantize-stats only + std::vector> tensors_by_name; + + ~falcon_model() { + if (ctx) { + ggml_free(ctx); + } + +#ifdef GGML_USE_CUBLAS + for (size_t i = 0; i < tensors_by_name.size(); ++i) { + ggml_cuda_free_data(tensors_by_name[i].second); + } +#elif defined(GGML_USE_CLBLAST) + for (size_t i = 0; i < tensors_by_name.size(); ++i) { + ggml_cl_free_data(tensors_by_name[i].second); + } +#endif + } +}; + +struct falcon_context { + std::mt19937 rng; + + int64_t t_load_us = 0; + int64_t t_start_us = 0; + bool has_evaluated_once = false; + + int64_t t_sample_us = 0; + int64_t t_eval_us = 0; + int64_t t_p_eval_us = 0; + + int32_t n_sample = 0; // number of tokens sampled + int32_t n_eval = 0; // number of eval calls + int32_t n_p_eval = + 0; // number of tokens in eval calls for the prompt (with batch size > 1) + + falcon_model model; + llama_vocab vocab; + + size_t mem_per_token = 0; + + // decode output (2-dimensional array: [n_tokens][n_vocab]) + std::vector logits; + bool logits_all = false; + + // input embedding (1-dimensional array: [n_embd]) + std::vector embedding; + + // memory buffers used to evaluate the model + // TODO: move in llama_state + llama_ctx_buffer buf_compute; + llama_ctx_buffer buf_scratch[LLAMA_MAX_SCRATCH_BUFFERS]; + +#ifdef GGML_USE_METAL + ggml_metal_context *ctx_metal = NULL; +#endif + + int buf_last = 0; + size_t buf_max_size[LLAMA_MAX_SCRATCH_BUFFERS] = {0}; + + void use_buf(struct ggml_context *ctx, int i) { +#if defined(LLAMA_USE_SCRATCH) + size_t last_size = 0; + + if (i == -1) { + last_size = ggml_set_scratch(ctx, { + 0, + 0, + nullptr, + }); + } else { + auto &buf = buf_scratch[i]; + last_size = ggml_set_scratch(ctx, { + 0, + buf.size, + buf.addr, + }); + } + + if (buf_last >= 0) { + buf_max_size[buf_last] = std::max(buf_max_size[buf_last], last_size); + } + + buf_last = i; +#else + (void)i; + (void)ctx; +#endif + } + + size_t get_buf_max_mem(int i) const { +#if defined(LLAMA_USE_SCRATCH) + return buf_max_size[i]; +#else + (void)i; + return 0; +#endif + } +}; + +struct falcon_file_loader { + llama_file file; + llama_file_version file_version; + falcon_hparams hparams; + llama_vocab vocab; + + falcon_file_loader(const char *fname, size_t file_idx, + llama_load_tensors_map &tensors_map) + : file(fname, "rb") { + read_magic(); + if (file_version <= 1) + fprintf(stderr, + "falcon.cpp: file version %d - Use falcon_quantize to update " + "version and improve performance\n", + file_version); + read_hparams(); + read_vocab(); + read_tensor_metadata(file_idx, tensors_map); + } + void read_magic() { + uint32_t magic = file.read_u32(); + + if (magic == LLAMA_FILE_MAGIC_GGML) { + file_version = LLAMA_FILE_VERSION_GGML; + return; + } + + uint32_t version = file.read_u32(); + + switch (magic) { + case LLAMA_FILE_MAGIC_GGMF: + switch (version) { + case 1: + file_version = LLAMA_FILE_VERSION_GGMF_V1; + return; + } + break; + case LLAMA_FILE_MAGIC_GGJT: + switch (version) { + case 1: + file_version = LLAMA_FILE_VERSION_GGJT_V1; + return; + case 2: + file_version = LLAMA_FILE_VERSION_GGJT_V2; + return; + case 3: + file_version = LLAMA_FILE_VERSION_GGJT_V3; + return; + } + } + + throw std::runtime_error( + format("unknown (magic, version) combination: %08x, %08x; is this " + "really a GGML file?", + magic, version)); + } + void read_hparams() { + hparams.n_vocab = file.read_u32(); + hparams.n_embd = file.read_u32(); + hparams.n_head = file.read_u32(); + hparams.n_head_kv = file.read_u32(); + hparams.n_layer = file.read_u32(); + hparams.n_falcon_type = file.read_u32(); + if (file_version == LLAMA_FILE_VERSION_GGML) { + int32_t ftype = file.read_u32(); + const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; + hparams.ftype = + (enum llama_ftype)(hparams.ftype % GGML_QNT_VERSION_FACTOR); + } else { + hparams.ftype = (enum llama_ftype)file.read_u32(); + } + + // hparams.ftype %= GGML_QNT_VERSION_FACTOR; + } + void read_vocab() { + vocab.id_to_token.resize(hparams.n_vocab); + + for (uint32_t i = 0; i < (uint32_t)hparams.n_vocab; i++) { + uint32_t len = file.read_u32(); + std::string word = file.read_string(len); + + float score = 0.0f; // flacon does not have scores in vocab, scores are a + // sentencepiece addition + if (file_version >= LLAMA_FILE_VERSION_GGMF_V1) { + file.read_raw(&score, sizeof(score)); + } + + vocab.token_to_id[word] = i; + + auto &tok_score = vocab.id_to_token[i]; + tok_score.tok = std::move(word); + tok_score.score = score; + } + } + void read_tensor_metadata(size_t file_idx, + llama_load_tensors_map &tensors_map) { + while (file.tell() < file.size) { + llama_load_tensor_shard shard; + uint32_t n_dims = file.read_u32(); + uint32_t name_len = file.read_u32(); + shard.type = (enum ggml_type)file.read_u32(); + shard.ne.resize(n_dims); + file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims); + std::string name = file.read_string(name_len); + if (n_dims < 1 || n_dims > 2) { + throw std::runtime_error( + format("falcon.cpp: tensor '%s' should not be %u-dimensional", + name.c_str(), n_dims)); + } + switch (shard.type) { + case GGML_TYPE_F32: + case GGML_TYPE_F16: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + break; + default: { + throw std::runtime_error( + format("unrecognized tensor type %u\n", shard.type)); + } + } + + if (file_version >= LLAMA_FILE_VERSION_GGJT_V1) { + // skip to the next multiple of 32 bytes + file.seek(-static_cast(file.tell()) & 31, SEEK_CUR); + } + shard.file_idx = file_idx; + shard.file_off = file.tell(); + + shard.calc_size(); + file.seek(shard.size, SEEK_CUR); + + auto it = tensors_map.name_to_idx.find(name); + size_t idx; + if (it != tensors_map.name_to_idx.end()) { + idx = it->second; + } else { + tensors_map.tensors.emplace_back(name); + idx = tensors_map.tensors.size() - 1; + tensors_map.name_to_idx.emplace(name, idx); + } + tensors_map.tensors.at(idx).shards.push_back(shard); + } + } +}; + +struct falcon_file_saver { + llama_file file; + falcon_file_loader *any_file_loader; + falcon_file_saver(const char *fname, falcon_file_loader *any_file_loader, + enum llama_ftype new_ftype) + : file(fname, "wb"), any_file_loader(any_file_loader) { + fprintf(stderr, "falcon.cpp: saving model to %s\n", fname); + write_magic(); + write_hparams(new_ftype); + write_vocab(); + } + void write_magic() { + file.write_u32(LLAMA_FILE_MAGIC); // magic + file.write_u32(LLAMA_FILE_VERSION); // version + } + void write_hparams(enum llama_ftype new_ftype) { + const falcon_hparams &hparams = any_file_loader->hparams; + file.write_u32(hparams.n_vocab); + file.write_u32(hparams.n_embd); + file.write_u32(hparams.n_head); + file.write_u32(hparams.n_head_kv); + file.write_u32(hparams.n_layer); + file.write_u32(hparams.n_falcon_type); + file.write_u32(new_ftype); + } + void write_vocab() { + if (any_file_loader->file_version == LLAMA_FILE_VERSION_GGML) { + fprintf(stderr, + "falcon.cpp: WARNING: input is an old file that doesn't have " + "scores; will add dummy scores\n"); + } + uint32_t n_vocab = any_file_loader->hparams.n_vocab; + for (uint32_t i = 0; i < n_vocab; i++) { + const auto &token_score = any_file_loader->vocab.id_to_token.at(i); + file.write_u32((uint32_t)token_score.tok.size()); + file.write_raw(token_score.tok.data(), token_score.tok.size()); + file.write_raw(&token_score.score, sizeof(token_score.score)); + } + } + void write_tensor(llama_load_tensor &tensor, enum ggml_type new_type, + const void *new_data, size_t new_size) { + switch (new_type) { + case GGML_TYPE_F32: + case GGML_TYPE_F16: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + break; + default: + LLAMA_ASSERT(false); + } + file.write_u32((uint32_t)tensor.ne.size()); + file.write_u32((uint32_t)tensor.name.size()); + file.write_u32(new_type); + file.write_raw(tensor.ne.data(), sizeof(tensor.ne[0]) * tensor.ne.size()); + file.write_raw(tensor.name.data(), tensor.name.size()); + file.seek(-static_cast(file.tell()) & 31, SEEK_CUR); + LLAMA_ASSERT(new_size == llama_calc_tensor_size(tensor.ne, new_type)); + file.write_raw(new_data, new_size); + } +}; + +struct falcon_model_loader { + std::vector> file_loaders; + llama_load_tensors_map tensors_map; + bool use_mmap; + size_t num_ggml_tensors_created = 0; + struct ggml_context *ggml_ctx = NULL; + std::unique_ptr mapping; + + falcon_model_loader(const std::string &fname_base, bool use_mmap, + bool vocab_only) { + auto *first_file = + new falcon_file_loader(fname_base.c_str(), 0, tensors_map); + file_loaders.emplace_back(first_file); + uint32_t n_parts = vocab_only ? 1 : guess_n_parts(); + for (uint32_t i = 1; i < n_parts; i++) { + std::string fname = fname_base + "." + std::to_string(i); + auto *ith_file = new falcon_file_loader(fname.c_str(), i, tensors_map); + file_loaders.emplace_back(ith_file); + if (ith_file->hparams != first_file->hparams) { + throw std::runtime_error( + format("falcon.cpp: hparams inconsistent between files")); + } + } + if (!llama_mmap::SUPPORTED) { + use_mmap = false; + } + if (use_mmap && alignment_prevents_mmap()) { + fprintf(stderr, + "falcon.cpp: can't use mmap because tensors are not aligned; " + "convert to new format to avoid this\n"); + use_mmap = false; + } + this->use_mmap = use_mmap; + for (llama_load_tensor < : tensors_map.tensors) { + lt.calc_all(); + } + } + + bool alignment_prevents_mmap() { + for (const llama_load_tensor < : tensors_map.tensors) { + for (const llama_load_tensor_shard &shard : lt.shards) { + if (shard.file_off & 3) { + return true; + } + } + } + return false; + } + + uint32_t guess_n_parts() const { + auto it = + tensors_map.name_to_idx.find("transformer.word_embeddings.weight"); + if (it == tensors_map.name_to_idx.end()) { + throw std::runtime_error(std::string("missing tok_embeddings.weight")); + } + const llama_load_tensor < = tensors_map.tensors.at(it->second); + return file_loaders.at(0)->hparams.n_embd / lt.shards.at(0).ne.at(0); + } + + void calc_sizes(size_t *ctx_size_p, size_t *mmapped_size_p) const { + *ctx_size_p = *mmapped_size_p = 0; + for (const llama_load_tensor < : tensors_map.tensors) { + *ctx_size_p += ggml_tensor_overhead(); + *(use_mmap ? mmapped_size_p : ctx_size_p) += lt.size; + } + } + + struct ggml_tensor *get_tensor(const std::string &name, + const std::vector &ne, + ggml_backend backend) { + auto it = tensors_map.name_to_idx.find(name); + if (it == tensors_map.name_to_idx.end()) { + throw std::runtime_error(std::runtime_error(format( + "falcon.cpp: tensor '%s' is missing from model", name.c_str()))); + } + llama_load_tensor < = tensors_map.tensors.at(it->second); + if (lt.ne != ne) { + throw std::runtime_error( + format("falcon.cpp: tensor '%s' has wrong shape; expected %s, got %s", + name.c_str(), llama_format_tensor_shape(ne).c_str(), + llama_format_tensor_shape(lt.ne).c_str())); + } + + return get_tensor_for(lt, backend); + } + + struct ggml_tensor *get_tensor_for(llama_load_tensor <, + ggml_backend backend) { + struct ggml_tensor *tensor; + if (backend != GGML_BACKEND_CPU) { + ggml_set_no_alloc(ggml_ctx, true); + } + if (lt.ne.size() == 2) { + tensor = ggml_new_tensor_2d(ggml_ctx, lt.type, lt.ne.at(0), lt.ne.at(1)); + } else { + LLAMA_ASSERT(lt.ne.size() == 1); + tensor = ggml_new_tensor_1d(ggml_ctx, lt.type, lt.ne.at(0)); + } + ggml_set_name(tensor, lt.name.c_str()); + LLAMA_ASSERT( + lt.ggml_tensor == + NULL); // if this fails, we called get_tensor twice on the same tensor + + if (backend != GGML_BACKEND_CPU) { + ggml_set_no_alloc(ggml_ctx, use_mmap); + } + tensor->backend = backend; + lt.ggml_tensor = tensor; + num_ggml_tensors_created++; + return tensor; + } + + void done_getting_tensors() const { + if (num_ggml_tensors_created != tensors_map.tensors.size()) { + throw std::runtime_error( + std::string("falcon.cpp: file contained more tensors than expected")); + } + } + + void load_all_data(llama_progress_callback progress_callback, + void *progress_callback_user_data, llama_mlock *lmlock) { + size_t data_size = 0; + size_t prefetch_size = 0; + size_t lock_size = 0; + for (const llama_load_tensor < : tensors_map.tensors) { + data_size += lt.size; + if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) { + prefetch_size += lt.size; + } + } + + if (use_mmap) { + mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size)); + if (lmlock) { + lmlock->init(mapping->addr); + } + } + + size_t done_size = 0; + for (llama_load_tensor < : tensors_map.tensors) { + if (progress_callback) { + progress_callback((float)done_size / data_size, + progress_callback_user_data); + } + LLAMA_ASSERT(lt.ggml_tensor); // unused tensors should have been caught + // by load_data already + lt.data = (uint8_t *)lt.ggml_tensor->data; + + // allocate temp buffer if not using mmap + if (!use_mmap && lt.data == NULL) { + GGML_ASSERT(lt.ggml_tensor->backend != GGML_BACKEND_CPU); + lt.data = (uint8_t *)malloc(ggml_nbytes(lt.ggml_tensor)); + } + + load_data_for(lt); + + switch (lt.ggml_tensor->backend) { + case GGML_BACKEND_CPU: + lt.ggml_tensor->data = lt.data; + if (use_mmap && lmlock) { + lock_size += lt.size; + lmlock->grow_to(lock_size); + } + break; +#if defined(GGML_USE_CUBLAS) + case GGML_BACKEND_GPU: + case GGML_BACKEND_GPU_SPLIT: + ggml_cuda_transform_tensor(lt.data, lt.ggml_tensor); + if (!use_mmap) { + free(lt.data); + } + break; +#elif defined(GGML_USE_CLBLAST) + case GGML_BACKEND_GPU: + ggml_cl_transform_tensor(lt.data, lt.ggml_tensor); + if (!use_mmap) { + free(lt.data); + } + break; +#endif + default: + continue; + } + + done_size += lt.size; + } + } + + void load_data_for(llama_load_tensor <) { + if (use_mmap) { + LLAMA_ASSERT(lt.shards.size() == 1); + lt.data = (uint8_t *)mapping->addr + lt.shards.at(0).file_off; + } else if (lt.split_type == SPLIT_NONE) { + llama_file &file = file_loaders.at(lt.shards.at(0).file_idx)->file; + file.seek(lt.shards.at(0).file_off, SEEK_SET); + file.read_raw(lt.data, lt.size); + } else if (lt.split_type == SPLIT_BY_ROWS) { + size_t offset = 0; + for (llama_load_tensor_shard &shard : lt.shards) { + llama_file &file = file_loaders.at(shard.file_idx)->file; + file.seek(shard.file_off, SEEK_SET); + file.read_raw(lt.data + offset, shard.size); + offset += shard.size; + } + LLAMA_ASSERT(offset == lt.size); + } else if (lt.split_type == SPLIT_BY_COLUMNS) { + // Let's load the data into temporary buffers to ensure the OS performs + // large loads. + std::vector tmp_bufs(lt.shards.size()); + for (size_t i = 0; i < lt.shards.size(); i++) { + llama_load_tensor_shard &shard = lt.shards.at(i); + llama_file &file = file_loaders.at(shard.file_idx)->file; + file.seek(shard.file_off, SEEK_SET); + tmp_bufs.at(i).resize(shard.size); + file.read_raw(tmp_bufs.at(i).addr, shard.size); + } + // Then reshape. + size_t num_rows = lt.ne.at(1); + size_t per_shard_row_size = lt.shards.at(0).size / num_rows; + size_t out_offset = 0; + for (size_t row = 0; row < num_rows; row++) { + for (llama_buffer &tmp_buf : tmp_bufs) { + memcpy(lt.data + out_offset, tmp_buf.addr + row * per_shard_row_size, + per_shard_row_size); + out_offset += per_shard_row_size; + } + } + LLAMA_ASSERT(out_offset == lt.size); + } + if (0) { + print_checksum(lt); + } + } + + static void print_checksum(llama_load_tensor <) { + uint32_t sum = 0; + for (size_t i = 0; i < lt.size; i++) { + uint8_t byte = lt.data[i]; + sum = byte + (sum << 6) + (sum << 16) - sum; // sdbm hash + } + fprintf(stderr, "%s checksum: %#08x (%s, size %zu)\n", lt.name.c_str(), sum, + llama_format_tensor_shape(lt.ne).c_str(), lt.size); + } +}; + +// +// kv cache +// + +static bool kv_cache_init(const struct falcon_hparams &hparams, + struct falcon_kv_cache &cache, ggml_type wtype, + int n_ctx, int n_gpu_layers) { + const int64_t n_layer = hparams.n_layer; + const int64_t head_dim = hparams.n_embd / hparams.n_head; + const int64_t n_elements = + hparams.n_layer * n_ctx * head_dim * hparams.n_head_kv; + + cache.buf.resize(FALCON_MEM_REQ_KV_SELF(hparams, wtype, n_ctx)); + + struct ggml_init_params params; + params.mem_size = cache.buf.size; + params.mem_buffer = cache.buf.addr; + params.no_alloc = false; + + cache.ctx = ggml_init(params); + + if (!cache.ctx) { + fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__); + return false; + } + + cache.k = ggml_new_tensor_1d(cache.ctx, wtype, n_elements); + cache.v = ggml_new_tensor_1d(cache.ctx, wtype, n_elements); + ggml_set_name(cache.k, "cache_k"); + ggml_set_name(cache.v, "cache_v"); + + (void)n_gpu_layers; +#ifdef GGML_USE_CUBLAS + if (n_gpu_layers > n_layer + 1) { + ggml_cuda_assign_buffers_no_scratch(cache.k); + ggml_cuda_assign_buffers_no_scratch(cache.v); + } +#endif // GGML_USE_CUBLAS + + return true; +} + +struct falcon_context_params falcon_context_default_params() { + struct falcon_context_params result = { + /*.n_ctx =*/512, + /*.n_batch =*/512, + /*.n_gpu_layers =*/0, + /*.i_gpu_start =*/-1, + /*.i_gpu_last =*/-1, + /*.main_gpu =*/0, + /*.tensor_split =*/{0}, + /*.seed =*/-1, + /*.f16_kv =*/false, + /*.logits_all =*/false, + /*.vocab_only =*/false, + /*.use_mmap =*/true, + /*.use_mlock =*/false, + /*.embedding =*/false, + /*.progress_callback =*/nullptr, + /*.progress_callback_user_data =*/nullptr, + }; + + return result; +} + +struct falcon_model_quantize_params falcon_model_quantize_default_params() { + struct falcon_model_quantize_params result = { + /*.nthread =*/0, + /*.ftype =*/LLAMA_FTYPE_MOSTLY_Q5_1, + /*.allow_requantize =*/false, + /*.quantize_output_tensor =*/true, + }; + + return result; +} + +// +// model loading +// + +static const char *falcon_model_type_name(falcon_e_model type) { + switch (type) { + case FALCON_7B: + return "7B"; + case FALCON_40B: + return "40B"; + default: + LLAMA_ASSERT(false); + } +} + +// dynamically gets all tensors from a layer +std::vector get_tensors_from_layer(falcon_layer &layer) { + std::vector tensors; + ggml_tensor **tensor_ptr = reinterpret_cast( + &layer); // Cast to the pointer to ggml_tensor pointer + + // Iterate through the members and store their addresses in the vector + for (std::size_t i = 0; i < sizeof(falcon_layer) / sizeof(ggml_tensor *); + ++i) { + tensors.push_back(tensor_ptr[i]); + } + + return tensors; +} +// get vram size of all tensors in a layer (todo: split handling) +size_t calculate_layer_vram_bytes(const falcon_layer &layer) { + size_t size = 0; + auto tensors = get_tensors_from_layer(const_cast(layer)); + + // Add the size of each member with GPU backend + for (const auto &tensor : tensors) { + if (tensor != nullptr && tensor->backend != GGML_BACKEND_CPU) { + size += ggml_nbytes(tensor); + } + } + + return size; +} + +static void falcon_model_load_internal( + const std::string &fname, falcon_context &lctx, int n_ctx, int n_batch, + int n_gpu_layers, int main_gpu, const float *tensor_split, + ggml_type memory_type, bool use_mmap, bool use_mlock, bool vocab_only, + llama_progress_callback progress_callback, + void *progress_callback_user_data) { + lctx.t_start_us = ggml_time_us(); + + std::unique_ptr ml( + new falcon_model_loader(fname, use_mmap, vocab_only)); + + lctx.vocab = std::move(ml->file_loaders.at(0)->vocab); + auto &model = lctx.model; + model.hparams = ml->file_loaders.at(0)->hparams; + model.n_gpu_layers = n_gpu_layers; + + llama_file_version file_version = ml->file_loaders.at(0)->file_version; + auto &hparams = model.hparams; + + { + switch (hparams.n_layer) { + case 32: + model.type = falcon_e_model::FALCON_7B; + break; + case 60: + model.type = falcon_e_model::FALCON_40B; + break; + default: { + if (hparams.n_falcon_type == 7) { + model.type = falcon_e_model::FALCON_7B; + } else if (hparams.n_falcon_type == 40) { + model.type = falcon_e_model::FALCON_40B; + } else { + LLAMA_ASSERT(false); + } + } break; + } + + hparams.n_ctx = n_ctx; + } + + const uint32_t n_ff = 4 * model.hparams.n_embd; + + if (file_version < LLAMA_FILE_VERSION_GGJT_V2) { + if (hparams.ftype != LLAMA_FTYPE_ALL_F32 && + hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 && + hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) { + throw std::runtime_error( + format("this format is no longer supported (see " + "https://github.com/ggerganov/llama.cpp/pull/1405)")); + } + } + + if (file_version < LLAMA_FILE_VERSION_GGJT_V3) { + if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 || + hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 || + hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) { + throw std::runtime_error( + format("this format is no longer supported (see " + "https://github.com/ggerganov/llama.cpp/pull/1508)")); + } + } + + if (vocab_only) { + return; + } + + auto &ctx = model.ctx; + + size_t ctx_size; + size_t mmapped_size; + ml->calc_sizes(&ctx_size, &mmapped_size); + + // create the ggml context + { + lctx.model.buf.resize(ctx_size); + if (use_mlock) { + lctx.model.mlock_buf.init(lctx.model.buf.addr); + lctx.model.mlock_buf.grow_to(lctx.model.buf.size); + } + + struct ggml_init_params params = { + /*.mem_size =*/lctx.model.buf.size, + /*.mem_buffer =*/lctx.model.buf.addr, + /*.no_alloc =*/ml->use_mmap, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) { + throw std::runtime_error(format("ggml_init() failed")); + } + } + + (void)main_gpu; +#if defined(GGML_USE_CUBLAS) + ggml_cuda_set_main_device(main_gpu); +#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU +#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU_SPLIT +#elif defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: using OpenCL for GPU acceleration\n", __func__); +#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU +#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU +#else +#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU +#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_CPU +#endif + + size_t vram_total = 0; + size_t vram_free = 0; + const size_t vram_reserved = + 512 * + MB; // that amount of VRAM is to stay free on GPU (headroom for other + // processes - may be reduced in pure server environments) + size_t vram_overhead = + 1250 * + MB; // this amount of vram is estimated for non weight storage buffers on + // VRAM (no big difference between 7B and 40B, needs to increase when + // more work is offloaded in the future) +#if defined(GGML_USE_CUBLAS) + // cublas is used in 32 bit mode, temporary cuda storage/conversion buffers + // are needed for batch ingestion ( could be run in 16 bit mode without + // performance downgrade and save half the VRAM) + if (model.type == FALCON_40B && n_batch > 1) { + vram_overhead += (1024 + 288 + 256) * MB; + } + if (model.type == FALCON_7B && n_batch > 1) { + vram_overhead += (315 + 80 + 78) * MB; + } + cudaMemGetInfo( + &vram_free, + &vram_total); // this should go in ggml-cuda.cu but I don't want to make + // Johannes life harder by modifying that yet +#endif + + // prepare memory for the weights + size_t vram_weights = 0; + size_t vram_scratch = 0; + + (void)vram_scratch; + (void)n_batch; + // calculate scratch buffer size and allocate it +#ifdef GGML_USE_CUBLAS + // vram_scratch = n_batch * MB; + vram_scratch = 0; // these are not used until we have multi operation support + ggml_cuda_set_scratch_size(vram_scratch); +#endif // GGML_USE_CUBLAS + + { + const uint32_t n_embd = hparams.n_embd; + const uint32_t n_head = hparams.n_head; + const uint32_t n_head_kv = hparams.n_head_kv; + const uint32_t n_layer = hparams.n_layer; + const uint32_t n_ff = 4 * model.hparams.n_embd; + const uint32_t n_vocab = hparams.n_vocab; + const uint32_t head_dim = hparams.n_embd / hparams.n_head; + + ml->ggml_ctx = ctx; + + model.tok_embeddings = ml->get_tensor("transformer.word_embeddings.weight", + {n_embd, n_vocab}, GGML_BACKEND_CPU); + + ggml_backend backend_norm; + ggml_backend backend_output; + // disabled norm/output offloading until further tests, causes silent crash + // at the moment + if (n_gpu_layers > int(n_layer) && false) { // NOLINT + backend_norm = LLAMA_BACKEND_OFFLOAD; + backend_output = LLAMA_BACKEND_OFFLOAD_SPLIT; + } else { + backend_norm = GGML_BACKEND_CPU; + backend_output = GGML_BACKEND_CPU; + } + + // "output" tensor + { + model.output_norm = + ml->get_tensor("transformer.ln_f.weight", {n_embd}, backend_norm); + model.output_norm_b = + ml->get_tensor("transformer.ln_f.bias", {n_embd}, backend_norm); + model.lm_head = + ml->get_tensor("lm_head.weight", {n_embd, n_vocab}, backend_output); + } + + if (backend_norm != GGML_BACKEND_CPU) { + vram_weights += + ggml_nbytes(model.output_norm) + ggml_nbytes(model.output_norm_b); + vram_free -= + ggml_nbytes(model.output_norm) + ggml_nbytes(model.output_norm_b); + } + if (backend_output != GGML_BACKEND_CPU) { + vram_weights += ggml_nbytes(model.lm_head); + vram_free -= ggml_nbytes(model.lm_head); + } + + int i_gpu_start = n_layer - n_gpu_layers; + if (i_gpu_start < 0) + i_gpu_start = 0; // n_gpu_layers can be larger than n_layer + + int i_gpu_last = n_layer; // allows to terminate the offloading earlier. + // TODO: instead do a proper calculation run and + // determine the start before the loop + model.i_gpu_start = i_gpu_start; + model.i_gpu_last = i_gpu_last; // if VRAM doesn't run out i_gpu_last is + // always the last layer + + model.layers.resize(n_layer); + for (uint32_t i = 0; i < n_layer; ++i) { + const ggml_backend backend = (int(i) < i_gpu_start || int(i) > i_gpu_last) + ? GGML_BACKEND_CPU + : LLAMA_BACKEND_OFFLOAD; // NOLINT + const ggml_backend backend_split = + (int(i) < i_gpu_start || int(i) > i_gpu_last) + ? GGML_BACKEND_CPU + : LLAMA_BACKEND_OFFLOAD_SPLIT; // NOLINT + + auto &layer = model.layers[i]; + + std::string layers_i = "layers." + std::to_string(i); + std::string str_i = std::to_string(i); + + if (model.type == FALCON_40B) { + layer.input_layernorm = + ml->get_tensor("transformer.h." + str_i + ".ln_mlp.weight", + {n_embd}, GGML_BACKEND_CPU); + layer.input_layernorm_b = + ml->get_tensor("transformer.h." + str_i + ".ln_mlp.bias", {n_embd}, + GGML_BACKEND_CPU); + layer.attention_norm = + ml->get_tensor("transformer.h." + str_i + ".ln_attn.weight", + {n_embd}, GGML_BACKEND_CPU); + layer.attention_norm_b = + ml->get_tensor("transformer.h." + str_i + ".ln_attn.bias", {n_embd}, + GGML_BACKEND_CPU); + } else // FALCON_7B + { + layer.input_layernorm = + ml->get_tensor("transformer.h." + str_i + ".input_layernorm.weight", + {n_embd}, backend); + layer.input_layernorm_b = + ml->get_tensor("transformer.h." + str_i + ".input_layernorm.bias", + {n_embd}, GGML_BACKEND_CPU); + } + + layer.query_key_value = ml->get_tensor( + "transformer.h." + str_i + ".self_attention.query_key_value.weight", + {n_embd, (n_head_kv * 2 + n_head) * head_dim}, backend_split); + layer.wo = ml->get_tensor( + "transformer.h." + str_i + ".self_attention.dense.weight", + {n_embd, n_embd}, backend_split); + + layer.ffn_up = + ml->get_tensor("transformer.h." + str_i + ".mlp.dense_h_to_4h.weight", + {n_embd, n_ff}, backend_split); // before gelu + layer.ffn_down = + ml->get_tensor("transformer.h." + str_i + ".mlp.dense_4h_to_h.weight", + {n_ff, n_embd}, backend_split); // after gelu + + if (backend != GGML_BACKEND_CPU) { + size_t vram_layer = 0; + vram_layer = calculate_layer_vram_bytes(layer); + vram_weights += vram_layer; + vram_free = + (vram_layer > vram_free) + ? 0 + : vram_free - + vram_layer; // simulate the layer being loaded in VRAM + // test if we have enough VRAM to offload the next layer + if (i < n_layer && vram_free <= (vram_overhead + vram_scratch + + vram_reserved + vram_layer)) { + fprintf(stderr, + "INFO: Not enough VRAM to load all requested layers - at " + "layer %d of %d: skipping\n", + i, n_layer); + model.n_gpu_layers = n_gpu_layers; + i_gpu_last = i; + model.i_gpu_last = i_gpu_last; + n_gpu_layers = i_gpu_last - i_gpu_start; + printf("INFO: %d layers will be offloaded to GPU (layers %d to %d)\n", + n_gpu_layers, i_gpu_start + 1, i_gpu_last + 1); + } + } + } + } + + ml->done_getting_tensors(); + + // print memory requirements + { + // this is the total memory required to run the inference + // TODO: this calculation is still wrong + int64_t mem_required = ctx_size + mmapped_size - + vram_weights + // weights in VRAM not in memory + FALCON_MEM_REQ_SCRATCH0().at(model.type) + + FALCON_MEM_REQ_SCRATCH1().at(model.type) + + FALCON_MEM_REQ_EVAL().at(model.type); + + if (mem_required < 0) mem_required = 0; + + // this is the memory required by one llama_state + const size_t mem_required_state = + FALCON_MEM_REQ_KV_SELF(model.hparams, memory_type, n_ctx); + + // moved scratch allocation of vram to top +#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) + const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer)); +#else + (void)n_gpu_layers; +#endif + } + + // populate `tensors_by_name` + for (llama_load_tensor < : ml->tensors_map.tensors) { + model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor); + } + + (void)tensor_split; +#if defined(GGML_USE_CUBLAS) + { ggml_cuda_set_tensor_split(tensor_split); } +#endif + + ml->load_all_data(progress_callback, progress_callback_user_data, + use_mlock ? &lctx.model.mlock_mmap : NULL); + + if (progress_callback) { + progress_callback(1.0f, progress_callback_user_data); + } + +#if defined(GGML_USE_CUBLAS) + // size_t vram_free_simulated = vram_free; + cudaMemGetInfo( + &vram_free, + &vram_total); // this should go in ggml-cuda.cu but I don't want to make + // Johannes life harder by modifying that yet +#endif + + model.mapping = std::move(ml->mapping); + + // loading time will be recalculate after the first eval, so + // we take page faults deferred by mmap() into consideration + lctx.t_load_us = ggml_time_us() - lctx.t_start_us; +} + +static bool falcon_model_load(const std::string &fname, falcon_context &lctx, + int n_ctx, int n_batch, int n_gpu_layers, + int main_gpu, float *tensor_split, + ggml_type memory_type, bool use_mmap, + bool use_mlock, bool vocab_only, + llama_progress_callback progress_callback, + void *progress_callback_user_data) { + try { + falcon_model_load_internal(fname, lctx, n_ctx, n_batch, n_gpu_layers, + main_gpu, tensor_split, memory_type, use_mmap, + use_mlock, vocab_only, progress_callback, + progress_callback_user_data); + return true; + } catch (const std::exception &err) { + fprintf(stderr, "error loading model: %s\n", err.what()); + return false; + } +} + +// evaluate the transformer +// +// - lctx: llama context +// - tokens: new batch of tokens to process +// - n_past: the context size so far +// - n_threads: number of threads to use +// - cgraph_fname: filename of the exported computation graph +// +static bool falcon_eval_internal(falcon_context &lctx, + const llama_token *tokens, const int n_tokens, + const int n_past, const int n_threads, + const char *cgraph_fname, int debug_timings) { + const int64_t t_start_us = ggml_time_us(); + + const int N = n_tokens; + // const int N = embd_inp.size(); + + const auto &model = lctx.model; + const auto &hparams = model.hparams; + + const auto &kv_self = model.kv_self; + + LLAMA_ASSERT(!!kv_self.ctx); + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_head = hparams.n_head; + const int n_head_kv = hparams.n_head_kv; + const int n_vocab = hparams.n_vocab; + const int n_falcon_type = hparams.n_falcon_type; + const int n_gpu_layers = model.n_gpu_layers; + const size_t head_dim = n_embd / n_head; // == n_rot in llama + + auto &mem_per_token = lctx.mem_per_token; + auto &buf_compute = lctx.buf_compute; + + struct ggml_init_params params = { + /*.mem_size =*/buf_compute.size, + /*.mem_buffer =*/buf_compute.addr, + /*.no_alloc =*/false, + }; + + struct ggml_context *ctx0 = ggml_init(params); + + // for big prompts, if BLAS is enabled, it is better to use only one thread + // otherwise, the threads are spin-lock waiting for the BLAS calls and are + // degrading the performance + ggml_cgraph gf = {}; + gf.n_threads = + N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads; + + struct ggml_tensor *embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + ggml_set_name(embd, "embd"); + memcpy(embd->data, tokens, N * ggml_element_size(embd)); + + struct ggml_tensor *cur; + struct ggml_tensor *inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd); + struct ggml_tensor *repeat_dummy = + ggml_new_tensor_3d(ctx0, inpL->type, head_dim, N + n_past, n_head); + + struct ggml_tensor *layernorm_output; + + ggml_type wtype = GGML_TYPE_F32; + // ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) + // (model.hparams.ftype)); + const int sizeof_wtype = ggml_type_sizef(wtype); + + // const int i_gpu_start = n_layer - n_gpu_layers; + const int i_gpu_start = lctx.model.i_gpu_start; + const int i_gpu_last = + lctx.model.i_gpu_last > 0 ? lctx.model.i_gpu_last : n_layer; + (void)i_gpu_start; + + // offload functions set the tensor output backend to GPU + // tensors are GPU-accelerated if any input or the output has been offloaded + // + // with the low VRAM option VRAM scratch is disabled in + // llama_load_model_internal in that case ggml_cuda_assign_buffers has no + // effect + offload_func_t offload_func_nr = llama_nop; // nr = non-repeating + offload_func_t offload_func_kqv = llama_nop; + +#ifdef GGML_USE_CUBLAS + // todo: use either a flag in model/params or a backend test to determine if + // norm/output are on GPU + if (n_gpu_layers > n_layer) { + offload_func_nr = ggml_cuda_assign_buffers; + } + if (n_gpu_layers > n_layer + 1) { + offload_func_kqv = ggml_cuda_assign_buffers; + } +#endif // GGML_USE_CUBLAS + + for (int il = 0; il < n_layer; ++il) { + offload_func_t offload_func = llama_nop; + +#ifdef GGML_USE_CUBLAS + if (il >= i_gpu_start && il < i_gpu_last) { + offload_func = + ggml_cuda_assign_buffers; // sets the output backend to GPU + } +#endif // GGML_USE_CUBLAS + + struct ggml_tensor *inpSA = inpL; + + lctx.use_buf(ctx0, 0); + + // self-attention + { + layernorm_output = ggml_norm(ctx0, inpL); + + ggml_tensor *il_a = + ggml_mul(ctx0, layernorm_output, model.layers[il].input_layernorm); + offload_func(il_a); // (todo: uses vram scratch) + + layernorm_output = + ggml_add(ctx0, il_a, + ggml_repeat(ctx0, model.layers[il].input_layernorm_b, + layernorm_output)); + offload_func(layernorm_output); + ggml_set_name(layernorm_output, "layernorm_output"); + + if (model.type == FALCON_40B || n_falcon_type == 40) { + cur = ggml_norm(ctx0, inpL); + + cur = ggml_add( + ctx0, + ggml_mul(ctx0, + ggml_repeat(ctx0, model.layers[il].attention_norm, cur), + cur), + ggml_repeat(ctx0, model.layers[il].attention_norm_b, cur)); + } else { + cur = layernorm_output; + } + + // compute QKV + + cur = ggml_mul_mat(ctx0, model.layers[il].query_key_value, cur); + // offload_func(cur); + + // Note that the strides for Kcur, Vcur are set up so that the + // resulting views are misaligned with the tensor's storage + // (by applying the K/V offset we shift the tensor's original + // view to stick out behind the viewed QKV tensor's allocated + // memory, so to say). This is ok because no actual accesses + // happen to that out-of-range memory, but it can require some + // trickery when trying to accurately dump these views for + // debugging. + + struct ggml_tensor *Qcur = + ggml_view_3d(ctx0, cur, head_dim, n_head, N, head_dim * sizeof_wtype, + head_dim * (n_head + 2 * n_head_kv) * sizeof_wtype, 0); + ggml_set_name(Qcur, "Qcur"); + + struct ggml_tensor *Kcur = ggml_view_3d( + ctx0, cur, head_dim, n_head_kv, N, head_dim * sizeof_wtype, + head_dim * (n_head + 2 * n_head_kv) * sizeof_wtype, + head_dim * n_head * sizeof_wtype); + ggml_set_name(Kcur, "Kcur"); + + struct ggml_tensor *Vcur = ggml_view_3d( + ctx0, cur, head_dim, n_head_kv, N, head_dim * sizeof_wtype, + head_dim * (n_head + 2 * n_head_kv) * sizeof_wtype, + head_dim * (n_head + n_head_kv) * sizeof_wtype); + ggml_set_name(Vcur, "Vcur"); + + // using mode = 2 for neox mode + Qcur = ggml_rope_inplace(ctx0, Qcur, n_past, head_dim, 2); + Kcur = ggml_rope_inplace(ctx0, Kcur, n_past, head_dim, 2); + + // store key and value to memory + { + struct ggml_tensor *k = + ggml_view_1d(ctx0, kv_self.k, N * n_head_kv * head_dim, + (ggml_element_size(kv_self.k) * n_head_kv * head_dim) * + (il * n_ctx + n_past)); + ggml_set_name(k, "k"); + struct ggml_tensor *v = + ggml_view_1d(ctx0, kv_self.v, N * n_head_kv * head_dim, + (ggml_element_size(kv_self.v) * n_head_kv * head_dim) * + (il * n_ctx + n_past)); + ggml_set_name(v, "v"); + + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + struct ggml_tensor *K = ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d(ctx0, kv_self.k, (n_past + N) * n_head_kv * head_dim, + il * n_ctx * ggml_element_size(kv_self.k) * + n_head_kv * head_dim), + head_dim, n_head_kv, n_past + N), + 0, 2, 1, 3); + + // K * Q + + K = ggml_cont(ctx0, ggml_repeat2(ctx0, K, repeat_dummy)); + ggml_set_name(K, "K"); + + struct ggml_tensor *Q = ggml_permute(ctx0, Qcur, 0, 2, 1, 3); + ggml_set_name(Q, "Q"); + struct ggml_tensor *KQ = ggml_mul_mat(ctx0, K, Q); + ggml_set_name(KQ, "KQ"); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + struct ggml_tensor *KQ_scaled = ggml_scale_inplace( + ctx0, KQ, ggml_new_f32(ctx0, 1.0f / sqrt(float(head_dim)))); + ggml_set_name(KQ_scaled, "KQ_scaled"); + + // KQ_masked = mask_past(KQ_scaled) + struct ggml_tensor *KQ_masked = + ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past); + ggml_set_name(KQ_masked, "KQ_masked"); + + // KQ = soft_max(KQ_masked) + struct ggml_tensor *KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); + ggml_set_name(KQ_soft_max, "KQ_soft_max"); + + // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, + // 3).contiguous() + struct ggml_tensor *V = ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d(ctx0, kv_self.v, (n_past + N) * n_head_kv * head_dim, + il * n_ctx * ggml_element_size(model.kv_self.v) * + n_head_kv * head_dim), + head_dim, n_head_kv, n_past + N), + 0, 2, 1, 3); + + V = ggml_cont(ctx0, + ggml_transpose(ctx0, ggml_repeat2(ctx0, V, repeat_dummy))); + ggml_set_name(V, "V"); + + // KQV = transpose(V) * KQ_soft_max + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); + ggml_set_name(KQV, "KQV"); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + struct ggml_tensor *KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + ggml_set_name(KQV_merged, "KQV_merged"); + + // cur = KQV_merged.contiguous().view(n_embd, N) + cur = ggml_cpy(ctx0, KQV_merged, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection + { + cur = ggml_mul_mat(ctx0, model.layers[il].wo, cur); + // offload_func(cur); + ggml_set_name(cur, "result_wo"); + } + } // end of attention + + lctx.use_buf(ctx0, 1); + // ggml_cuda_set_scratch(1); + + struct ggml_tensor *inpFF = layernorm_output; + ggml_set_name(inpFF, "inpFF"); + struct ggml_tensor *attn_out = + ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + // offload_func(attn_out); + ggml_set_name(attn_out, "attn_out"); + { + cur = ggml_mul_mat(ctx0, model.layers[il].ffn_up, inpFF); + // offload_func(cur); + ggml_set_name(cur, "inpFF*ff_up"); + cur = ggml_gelu(ctx0, cur); + // offload_func(cur); + cur = ggml_mul_mat(ctx0, model.layers[il].ffn_down, cur); + // offload_func(cur); + ggml_set_name(cur, "gelu_cur*ff_down"); + } + + cur = ggml_add(ctx0, cur, attn_out); + cur = ggml_add(ctx0, cur, inpL); + ggml_set_name(cur, "inpFF_+_result_attn_out"); + // input for next layer + inpL = cur; + } // end of layer loop + lctx.use_buf(ctx0, 0); + // ggml_cuda_set_scratch(0); + + // used at the end to optionally extract the embeddings + struct ggml_tensor *embeddings = NULL; + + offload_func_t offload_func = llama_nop; + +#ifdef GGML_USE_CUBLAS + if (n_gpu_layers > 0 && n_layer >= i_gpu_start && n_layer <= i_gpu_last) { + offload_func = ggml_cuda_assign_buffers; // sets the output backend to GPU + } +#endif // GGML_USE_CUBLAS + + // norm + { + cur = ggml_norm(ctx0, cur); + // offload_func(cur); + ggml_set_name(cur, "norm_cur"); + + // inpL = ln_f_g*inpL + ln_f_b + cur = ggml_add( + ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, model.output_norm, cur), cur), + ggml_repeat(ctx0, model.output_norm_b, cur)); + // offload_func(cur); + ggml_set_name(cur, "result_norm"); + + embeddings = cur; + } + + // language modelling head + cur = ggml_mul_mat(ctx0, model.lm_head, cur); + // offload_func(cur); + ggml_set_name(cur, "result_lm_head"); + + // cur = ggml_mul_mat(ctx0, model.output, cur); + // ggml_set_name(cur, "result_output"); + + lctx.use_buf(ctx0, -1); + + // logits -> probs + // cur = ggml_soft_max_inplace(ctx0, cur); + + // run the computation + ggml_build_forward_expand(&gf, cur); +#if 0 + // use to confirm vram_overhead is correct + size_t vram_total=0; + size_t vram_free=0; +#if defined(GGML_USE_CUBLAS) + cudaMemGetInfo(&vram_free, &vram_total); // this should go in ggml-cuda.cu but I don't want to make Johannes life harder by modifying that yet + fprintf(stderr, "\n%s: VRAM free: %7.2f MB of %7.2f MB (in use: %7.2f MB)\n", __func__, vram_free/MB*1.0, vram_total/MB*1.0, (vram_total-vram_free)/MB*1.0); +#endif +#endif + +#ifdef GGML_USE_METAL + if (lctx.ctx_metal && N == 1) { + ggml_metal_graph_compute(lctx.ctx_metal, &gf); + ggml_metal_get_tensor(lctx.ctx_metal, cur); + } else { + // IMPORTANT: + // Since we don't have efficient Matrix x Matrix Metal multiplication yet, + // we fallback to vanilla ggml_graph_compute(). It uses Apple's Accelerate + // CBLAS API which takes advantage of the ANE or the AMX coprocessor. + // + // When we implement Matrix x Matrix Metal multiplication, we can avoid this + // branch. But for now, we have focused only on Matrix x Vector Metal + // multiplication. + // + // TODO: avoid these syncs via shared memory (ref #1696) + // + if (lctx.ctx_metal) { + // We need to sync the GPU KV cache with the CPU KV cache + ggml_metal_get_tensor(lctx.ctx_metal, kv_self.k); + ggml_metal_get_tensor(lctx.ctx_metal, kv_self.v); + } + + ggml_graph_compute(ctx0, &gf); + } +#else + ggml_graph_compute(ctx0, &gf); +#endif + + if (cgraph_fname) { + ggml_graph_export(&gf, cgraph_fname); + } + + // plot the computation graph in dot format (for debugging purposes) + // if (n_past%100 == 0) { + // ggml_graph_dump_dot(&gf, NULL, "llama.dot"); + //} + + // embd_w.resize(n_vocab*N); + // memcpy(embd_w.data(), ggml_get_data(cur), sizeof(float)*n_vocab*N); + + // update kv token count + lctx.model.kv_self.n = n_past + N; + + // extract logits + { + auto &logits_out = lctx.logits; + + if (lctx.logits_all) { + logits_out.resize(n_vocab * N); + memcpy(logits_out.data(), (float *)ggml_get_data(cur), + sizeof(float) * n_vocab * N); + } else { + // return result for just the last token + logits_out.resize(n_vocab); + memcpy(logits_out.data(), + (float *)ggml_get_data(cur) + (n_vocab * (N - 1)), + sizeof(float) * n_vocab); + } + } + + // extract embeddings + if (!lctx.embedding.empty()) { + auto &embedding_out = lctx.embedding; + + embedding_out.resize(n_embd); + memcpy(embedding_out.data(), + (float *)ggml_get_data(embeddings) + (n_embd * (N - 1)), + sizeof(float) * n_embd); + } + + if (mem_per_token == 0) { + mem_per_token = ggml_used_mem(ctx0) / N; + } + +#if 0 + printf("\n%s: used_mem = %.3f MB, scratch -- %.3f MB %.3f MB\n", __func__, + ggml_used_mem(ctx0)/1024.0/1024.0, + lctx.get_buf_max_mem(0)/1024.0/1024.0, + lctx.get_buf_max_mem(1)/1024.0/1024.0); +#endif + + ggml_free(ctx0); + + // measure the performance only for the single-token evals + if (N == 1) { + lctx.t_eval_us += ggml_time_us() - t_start_us; + lctx.n_eval++; + } else if (N > 1) { + lctx.t_p_eval_us += ggml_time_us() - t_start_us; + lctx.n_p_eval += N; + } + + return true; +} + +// +// tokenizer +// + +static std::vector falcon_tokenize(const llama_vocab &vocab, + const std::string &text, + bool bos) { + llama_tokenizer tokenizer(vocab); + std::vector output; + + if (text.empty()) { + return output; + } + + if (bos) { + output.push_back(falcon_token_bos()); + } + + tokenizer.tokenize(text, output); + return output; +} + +// +// sampling +// + +void falcon_sample_softmax(struct falcon_context *ctx, + llama_token_data_array *candidates) { + assert(candidates->size > 0); + + const int64_t t_start_sample_us = ggml_time_us(); + + // Sort the logits in descending order + if (!candidates->sorted) { + std::sort(candidates->data, candidates->data + candidates->size, + [](const llama_token_data &a, const llama_token_data &b) { + return a.logit > b.logit; + }); + candidates->sorted = true; + } + + float max_l = candidates->data[0].logit; + float cum_sum = 0.0f; + for (size_t i = 0; i < candidates->size; ++i) { + float p = expf(candidates->data[i].logit - max_l); + candidates->data[i].p = p; + cum_sum += p; + } + for (size_t i = 0; i < candidates->size; ++i) { + candidates->data[i].p /= cum_sum; + } + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void falcon_sample_top_k(struct falcon_context *ctx, + llama_token_data_array *candidates, int k, + size_t min_keep) { + const int64_t t_start_sample_us = ggml_time_us(); + + k = std::max(k, (int)min_keep); + k = std::min(k, (int)candidates->size); + + // Sort scores in descending order + if (!candidates->sorted) { + auto comp = [](const llama_token_data &a, const llama_token_data &b) { + return a.logit > b.logit; + }; + if (k == (int)candidates->size) { + std::sort(candidates->data, candidates->data + candidates->size, comp); + } else { + std::partial_sort(candidates->data, candidates->data + k, + candidates->data + candidates->size, comp); + } + candidates->sorted = true; + } + candidates->size = k; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void falcon_sample_top_p(struct falcon_context *ctx, + llama_token_data_array *candidates, float p, + size_t min_keep) { + if (p >= 1.0f) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + falcon_sample_softmax(ctx, candidates); + + // Compute the cumulative probabilities + float cum_sum = 0.0f; + size_t last_idx = candidates->size; + + for (size_t i = 0; i < candidates->size; ++i) { + cum_sum += candidates->data[i].p; + + // Check if the running sum is greater than p or if we have kept at least + // min_keep tokens + if (cum_sum > p && i >= min_keep) { + last_idx = i; + break; + } + } + + // Resize the output vector to keep only the top-p tokens + candidates->size = last_idx; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void falcon_sample_tail_free(struct falcon_context *ctx, + llama_token_data_array *candidates, float z, + size_t min_keep) { + if (z >= 1.0f || candidates->size <= 2) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + falcon_sample_softmax(nullptr, candidates); + + // Compute the first and second derivatives + std::vector first_derivatives(candidates->size - 1); + std::vector second_derivatives(candidates->size - 2); + + for (size_t i = 0; i < first_derivatives.size(); ++i) { + first_derivatives[i] = candidates->data[i].p - candidates->data[i + 1].p; + } + for (size_t i = 0; i < second_derivatives.size(); ++i) { + second_derivatives[i] = first_derivatives[i] - first_derivatives[i + 1]; + } + + // Calculate absolute value of second derivatives + for (size_t i = 0; i < second_derivatives.size(); ++i) { + second_derivatives[i] = abs(second_derivatives[i]); + } + + // Normalize the second derivatives + float second_derivatives_sum = std::accumulate( + second_derivatives.begin(), second_derivatives.end(), 0.0f); + for (float &value : second_derivatives) { + value /= second_derivatives_sum; + } + + float cum_sum = 0.0f; + size_t last_idx = candidates->size; + for (size_t i = 0; i < second_derivatives.size(); ++i) { + cum_sum += second_derivatives[i]; + + // Check if the running sum is greater than z or if we have kept at least + // min_keep tokens + if (cum_sum > z && i >= min_keep) { + last_idx = i; + break; + } + } + + // Resize the output vector to keep only the tokens above the tail location + candidates->size = last_idx; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void falcon_sample_typical(struct falcon_context *ctx, + llama_token_data_array *candidates, float p, + size_t min_keep) { + // Reference implementation: + // https://github.com/huggingface/transformers/compare/main...cimeister:typical-sampling:typical-pr + if (p >= 1.0f) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + // Compute the softmax of logits and calculate entropy + falcon_sample_softmax(nullptr, candidates); + + float entropy = 0.0f; + for (size_t i = 0; i < candidates->size; ++i) { + entropy += -candidates->data[i].p * logf(candidates->data[i].p); + } + + // Compute the absolute difference between negative log probability and + // entropy for each candidate + std::vector shifted_scores; + for (size_t i = 0; i < candidates->size; ++i) { + float shifted_score = fabsf(-logf(candidates->data[i].p) - entropy); + shifted_scores.push_back(shifted_score); + } + + // Sort tokens based on the shifted_scores and their corresponding indices + std::vector indices(candidates->size); + std::iota(indices.begin(), indices.end(), 0); + + std::sort(indices.begin(), indices.end(), [&](size_t a, size_t b) { + return shifted_scores[a] < shifted_scores[b]; + }); + + // Compute the cumulative probabilities + float cum_sum = 0.0f; + size_t last_idx = indices.size(); + + for (size_t i = 0; i < indices.size(); ++i) { + size_t idx = indices[i]; + cum_sum += candidates->data[idx].p; + + // Check if the running sum is greater than typical or if we have kept at + // least min_keep tokens + if (cum_sum > p && i >= min_keep - 1) { + last_idx = i + 1; + break; + } + } + + // Resize the output vector to keep only the locally typical tokens + std::vector new_candidates; + for (size_t i = 0; i < last_idx; ++i) { + size_t idx = indices[i]; + new_candidates.push_back(candidates->data[idx]); + } + + // Replace the data in candidates with the new_candidates data + std::copy(new_candidates.begin(), new_candidates.end(), candidates->data); + candidates->size = new_candidates.size(); + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void falcon_sample_temperature(struct falcon_context *ctx, + llama_token_data_array *candidates_p, + float temp) { + const int64_t t_start_sample_us = ggml_time_us(); + + for (size_t i = 0; i < candidates_p->size; ++i) { + candidates_p->data[i].logit /= temp; + } + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void falcon_sample_repetition_penalty(struct falcon_context *ctx, + llama_token_data_array *candidates, + const llama_token *last_tokens, + size_t last_tokens_size, float penalty) { + if (last_tokens_size == 0 || penalty == 1.0f) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + for (size_t i = 0; i < candidates->size; ++i) { + const auto *token_iter = std::find( + last_tokens, last_tokens + last_tokens_size, candidates->data[i].id); + if (token_iter == last_tokens + last_tokens_size) { + continue; + } + + // The academic publication that described this technique actually just only + // divided, but that would cause tokens with negative logits to become more + // likely, which is obviously wrong. This is common fix for this problem, + // which is to multiply by the penalty instead of dividing. + if (candidates->data[i].logit <= 0) { + candidates->data[i].logit *= penalty; + } else { + candidates->data[i].logit /= penalty; + } + } + + candidates->sorted = false; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void falcon_sample_frequency_and_presence_penalties( + struct falcon_context *ctx, llama_token_data_array *candidates, + const llama_token *last_tokens_p, size_t last_tokens_size, + float alpha_frequency, float alpha_presence) { + if (last_tokens_size == 0 || + (alpha_frequency == 0.0f && alpha_presence == 0.0f)) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + // Create a frequency map to count occurrences of each token in last_tokens + std::unordered_map token_count; + for (size_t i = 0; i < last_tokens_size; ++i) { + token_count[last_tokens_p[i]]++; + } + + // Apply frequency and presence penalties to the candidates + for (size_t i = 0; i < candidates->size; ++i) { + auto token_iter = token_count.find(candidates->data[i].id); + if (token_iter == token_count.end()) { + continue; + } + + int count = token_iter->second; + candidates->data[i].logit -= + float(count) * alpha_frequency + float(count > 0) * alpha_presence; + } + + candidates->sorted = false; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +llama_token falcon_sample_token_mirostat(struct falcon_context *ctx, + llama_token_data_array *candidates, + float tau, float eta, int m, + float *mu) { + assert(ctx); + auto N = float(falcon_n_vocab(ctx)); + int64_t t_start_sample_us; + t_start_sample_us = ggml_time_us(); + + falcon_sample_softmax(nullptr, candidates); + + // Estimate s_hat using the most probable m tokens + float s_hat = 0.0; + float sum_ti_bi = 0.0; + float sum_ti_sq = 0.0; + for (size_t i = 0; i < size_t(m - 1) && i < candidates->size - 1; ++i) { + float t_i = logf(float(i + 2) / float(i + 1)); + float b_i = logf(candidates->data[i].p / candidates->data[i + 1].p); + sum_ti_bi += t_i * b_i; + sum_ti_sq += t_i * t_i; + } + s_hat = sum_ti_bi / sum_ti_sq; + + // Compute k from the estimated s_hat and target surprise value + float epsilon_hat = s_hat - 1; + float k = powf((epsilon_hat * powf(2, *mu)) / (1 - powf(N, -epsilon_hat)), + 1 / s_hat); + + // Sample the next word X using top-k sampling + falcon_sample_top_k(nullptr, candidates, int(k), 1); + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } + llama_token X = falcon_sample_token(ctx, candidates); + t_start_sample_us = ggml_time_us(); + + // Compute error as the difference between observed surprise and target + // surprise value + size_t X_idx = std::distance( + candidates->data, + std::find_if(candidates->data, candidates->data + candidates->size, + [&](const llama_token_data &candidate) { + return candidate.id == X; + })); + float observed_surprise = -log2f(candidates->data[X_idx].p); + float e = observed_surprise - tau; + + // Update mu using the learning rate and error + *mu = *mu - eta * e; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + ctx->n_sample++; + } + return X; +} + +llama_token falcon_sample_token_mirostat_v2(struct falcon_context *ctx, + llama_token_data_array *candidates, + float tau, float eta, float *mu) { + assert(ctx); + int64_t t_start_sample_us; + t_start_sample_us = ggml_time_us(); + + falcon_sample_softmax(ctx, candidates); + + // Truncate the words with surprise values greater than mu + candidates->size = std::distance( + candidates->data, + std::find_if(candidates->data, candidates->data + candidates->size, + [&](const llama_token_data &candidate) { + return -log2f(candidate.p) > *mu; + })); + + if (candidates->size == 0) { + candidates->size = 1; + } + + // Normalize the probabilities of the remaining words + falcon_sample_softmax(ctx, candidates); + + // Sample the next word X from the remaining words + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } + llama_token X = falcon_sample_token(ctx, candidates); + t_start_sample_us = ggml_time_us(); + + // Compute error as the difference between observed surprise and target + // surprise value + size_t X_idx = std::distance( + candidates->data, + std::find_if(candidates->data, candidates->data + candidates->size, + [&](const llama_token_data &candidate) { + return candidate.id == X; + })); + float observed_surprise = -log2f(candidates->data[X_idx].p); + float e = observed_surprise - tau; + + // Update mu using the learning rate and error + *mu = *mu - eta * e; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } + return X; +} + +llama_token falcon_sample_token_greedy(struct falcon_context *ctx, + llama_token_data_array *candidates) { + const int64_t t_start_sample_us = ggml_time_us(); + + // Find max element + auto *max_iter = std::max_element( + candidates->data, candidates->data + candidates->size, + [](const llama_token_data &a, const llama_token_data &b) { + return a.logit < b.logit; + }); + + llama_token result = max_iter->id; + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + ctx->n_sample++; + } + return result; +} + +llama_token falcon_sample_token(struct falcon_context *ctx, + llama_token_data_array *candidates) { + assert(ctx); + const int64_t t_start_sample_us = ggml_time_us(); + falcon_sample_softmax(nullptr, candidates); + + std::vector probs; + probs.reserve(candidates->size); + for (size_t i = 0; i < candidates->size; ++i) { + probs.push_back(candidates->data[i].p); + } + + std::discrete_distribution<> dist(probs.begin(), probs.end()); + auto &rng = ctx->rng; + int idx = dist(rng); + + llama_token result = candidates->data[idx].id; + + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + ctx->n_sample++; + return result; +} + +// +// quantization +// + +static void falcon_model_quantize_internal( + const std::string &fname_inp, const std::string &fname_out, + const falcon_model_quantize_params *params) { + ggml_type quantized_type; + llama_ftype ftype = params->ftype; + int nthread = params->nthread; + + switch (params->ftype) { + case LLAMA_FTYPE_MOSTLY_Q4_0: + quantized_type = GGML_TYPE_Q4_0; + break; + case LLAMA_FTYPE_MOSTLY_Q4_1: + quantized_type = GGML_TYPE_Q4_1; + break; + case LLAMA_FTYPE_MOSTLY_Q5_0: + quantized_type = GGML_TYPE_Q5_0; + break; + case LLAMA_FTYPE_MOSTLY_Q5_1: + quantized_type = GGML_TYPE_Q5_1; + break; + case LLAMA_FTYPE_MOSTLY_Q8_0: + quantized_type = GGML_TYPE_Q8_0; + break; + case LLAMA_FTYPE_MOSTLY_F16: + quantized_type = GGML_TYPE_F16; + break; + case LLAMA_FTYPE_ALL_F32: + quantized_type = GGML_TYPE_F32; + break; + +#ifdef GGML_USE_K_QUANTS + // K-quants + case LLAMA_FTYPE_MOSTLY_Q2_K: + quantized_type = GGML_TYPE_Q2_K; + break; + case LLAMA_FTYPE_MOSTLY_Q3_K_S: + case LLAMA_FTYPE_MOSTLY_Q3_K_M: + case LLAMA_FTYPE_MOSTLY_Q3_K_L: + quantized_type = GGML_TYPE_Q3_K; + break; + case LLAMA_FTYPE_MOSTLY_Q4_K_S: + case LLAMA_FTYPE_MOSTLY_Q4_K_M: + quantized_type = GGML_TYPE_Q4_K; + break; + case LLAMA_FTYPE_MOSTLY_Q5_K_S: + case LLAMA_FTYPE_MOSTLY_Q5_K_M: + quantized_type = GGML_TYPE_Q5_K; + break; + case LLAMA_FTYPE_MOSTLY_Q6_K: + quantized_type = GGML_TYPE_Q6_K; + break; +#endif + default: + throw std::runtime_error(format("invalid output file type %d\n", ftype)); + } + + if (nthread <= 0) { + nthread = std::thread::hardware_concurrency(); + } + + std::unique_ptr model_loader( + new falcon_model_loader(fname_inp, /*use_mmap*/ false, + /*vocab_only*/ false)); + falcon_file_saver file_saver( + fname_out.c_str(), model_loader->file_loaders.at(0).get(), params->ftype); + +#ifdef GGML_USE_K_QUANTS + int n_attention_wv = 0; + int n_feed_forward_w2 = 0; + for (auto &tensor : model_loader->tensors_map.tensors) { + if (tensor.name.find("attention.wv.weight") != std::string::npos) { + ++n_attention_wv; + } else if (tensor.name.find("feed_forward.w2.weight") != + std::string::npos) { + ++n_feed_forward_w2; + } + } + + int i_attention_wv = 0; + int i_feed_forward_w2 = 0; +#endif + + size_t total_size_org = 0; + size_t total_size_new = 0; + std::vector hist_all(1 << 4, 0); + + std::vector workers; + std::mutex mutex; + + size_t idx = 0; + for (llama_load_tensor &tensor : model_loader->tensors_map.tensors) { + llama_buffer read_data; + read_data.resize(tensor.size); + tensor.data = read_data.addr; + model_loader->load_data_for(tensor); + + printf("[%4zu/%4zu] %36s - %16s, type = %6s, ", ++idx, + model_loader->tensors_map.tensors.size(), tensor.name.c_str(), + llama_format_tensor_shape(tensor.ne).c_str(), + ggml_type_name(tensor.type)); + + // This used to be a regex, but has an extreme cost to compile + // times. + bool quantize = tensor.name.rfind("weight") == + tensor.name.size() - 6; // ends with 'weight'? + + // quantize only 2D tensors + quantize &= (tensor.ne.size() == 2); + quantize &= + params->quantize_output_tensor || tensor.name != "output.weight"; + quantize &= quantized_type != tensor.type; + + enum ggml_type new_type; + void *new_data; + size_t new_size; + llama_buffer work; + + if (!quantize) { + new_type = tensor.type; + new_data = tensor.data; + new_size = tensor.size; + printf("(Not quantizing) size = %8.3f MB\n", + tensor.size / 1024.0 / 1024.0); + } else { + new_type = quantized_type; +#ifdef GGML_USE_K_QUANTS + if (quantized_type == GGML_TYPE_Q2_K || + quantized_type == GGML_TYPE_Q3_K || + quantized_type == GGML_TYPE_Q4_K || + quantized_type == GGML_TYPE_Q5_K || + quantized_type == GGML_TYPE_Q6_K) { + int nx = tensor.ne.at(0); + int ny = tensor.ne.at(0); + if (nx % QK_K != 0 || ny % QK_K != 0) { + fprintf(stderr, + "\n\n========================= Tensor sizes %d x %d are not " + "divisible by %d\n", + nx, ny, QK_K); + fprintf(stderr, + "This is required to be able to use k-quants for now!\n"); + fprintf(stderr, + "============================================================" + "============================\n\n"); + throw std::runtime_error("Unsupported tensor size encountered\n"); + } + } + // if (tensor.name == ".mlp.dense_") { + // new_type = GGML_TYPE_Q6_K; + // } + // TODO falcon + +#endif + + float *f32_data; + size_t nelements = tensor.ne.at(0) * tensor.ne.at(1); + llama_buffer f32_conv_buf; + + if (tensor.type == GGML_TYPE_F32) { + f32_data = (float *)tensor.data; + } else if (ggml_is_quantized(tensor.type) && !params->allow_requantize) { + throw std::runtime_error(format("requantizing from type %s is disabled", + ggml_type_name(tensor.type))); + } else { + llama_convert_tensor_internal(tensor, f32_conv_buf, nelements, nthread); + f32_data = (float *)f32_conv_buf.addr; + } + + printf("quantizing .. "); + fflush(stdout); + + work.resize(nelements * 4); // upper bound on size + new_data = work.addr; + std::vector hist_cur(1 << 4, 0); + + int chunk_size = 32 * 512; + const int nchunk = (nelements + chunk_size - 1) / chunk_size; + const int nthread_use = + nthread > 1 ? std::max(1, std::min(nthread, nchunk)) : 1; + if (nthread_use < 2) { + new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, + nelements, hist_cur.data()); + } else { + size_t counter = 0; + new_size = 0; + auto compute = [&mutex, &counter, &hist_cur, &new_size, new_type, + f32_data, new_data, nelements, chunk_size]() { + std::vector local_hist; + size_t local_size = 0; + while (true) { + std::unique_lock lock(mutex); + size_t first = counter; + counter += chunk_size; + if (first >= nelements) { + if (!local_hist.empty()) { + for (int j = 0; j < int(local_hist.size()); ++j) { + hist_cur[j] += local_hist[j]; + } + new_size += local_size; + } + break; + } + lock.unlock(); + size_t last = std::min(nelements, first + chunk_size); + if (local_hist.empty()) { + local_hist.resize(hist_cur.size(), 0); + } + local_size += + ggml_quantize_chunk(new_type, f32_data, new_data, first, + last - first, local_hist.data()); + } + }; + if ((int)workers.size() < nthread_use - 1) { + workers.resize(nthread_use - 1); + } + for (int it = 0; it < nthread_use - 1; ++it) { + workers[it] = std::thread(compute); + } + compute(); + for (int it = 0; it < nthread_use - 1; ++it) { + workers[it].join(); + } + } + + printf("size = %8.2f MB -> %8.2f MB | hist: ", + tensor.size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0); + int64_t tot_count = 0; + for (size_t i = 0; i < hist_cur.size(); i++) { + hist_all[i] += hist_cur[i]; + tot_count += hist_cur[i]; + } + + if (tot_count > 0) { + for (size_t i = 0; i < hist_cur.size(); i++) { + printf("%5.3f ", hist_cur[i] / float(nelements)); + } + } + printf("\n"); + } + total_size_org += tensor.size; + total_size_new += new_size; + file_saver.write_tensor(tensor, new_type, new_data, new_size); + } + + printf("%s: model size = %8.2f MB\n", __func__, + total_size_org / 1024.0 / 1024.0); + printf("%s: quant size = %8.2f MB\n", __func__, + total_size_new / 1024.0 / 1024.0); + + { + int64_t sum_all = 0; + for (size_t i = 0; i < hist_all.size(); i++) { + sum_all += hist_all[i]; + } + + if (sum_all > 0) { + printf("%s: hist: ", __func__); + for (size_t i = 0; i < hist_all.size(); i++) { + printf("%5.3f ", hist_all[i] / float(sum_all)); + } + printf("\n"); + } + } +} + +// +// interface implementation +// + +struct falcon_context *falcon_init_from_file( + const char *path_model, struct falcon_context_params params) { + ggml_time_init(); + + falcon_context *ctx = new falcon_context; + + if (params.seed < 0) { + params.seed = time(NULL); + } + + unsigned cur_percentage = 0; + if (params.progress_callback == NULL) { + params.progress_callback_user_data = &cur_percentage; + params.progress_callback = [](float progress, void *ctx) { + unsigned *cur_percentage_p = (unsigned *)ctx; + unsigned percentage = (unsigned)(100 * progress); + while (percentage > *cur_percentage_p) { + *cur_percentage_p = percentage; + } + }; + } + + ctx->rng = std::mt19937(params.seed); + ctx->logits_all = params.logits_all; + + ggml_type memory_type = params.f16_kv ? GGML_TYPE_F16 : GGML_TYPE_F32; + + if (!falcon_model_load( + path_model, *ctx, params.n_ctx, params.n_batch, params.n_gpu_layers, + params.main_gpu, params.tensor_split, memory_type, params.use_mmap, + params.use_mlock, params.vocab_only, params.progress_callback, + params.progress_callback_user_data)) { + fprintf(stderr, "%s: failed to load model\n", __func__); + falcon_free(ctx); + return nullptr; + } + // model_load_internal() may change this if VRAM runs out + params.n_gpu_layers = ctx->model.n_gpu_layers; + params.i_gpu_start = + ctx->model.i_gpu_start; // first layer that's GPU accelerated + params.i_gpu_last = + ctx->model.i_gpu_last; // last layer that's GPU accelerated + + // reserve memory for context buffers + if (!params.vocab_only) { + if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, + ctx->model.hparams.n_ctx, params.n_gpu_layers)) { + fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", + __func__); + falcon_free(ctx); + return nullptr; + } + + const auto &hparams = ctx->model.hparams; + + // resized during inference + if (params.logits_all) { + ctx->logits.reserve(hparams.n_ctx * hparams.n_vocab); + } else { + ctx->logits.reserve(hparams.n_vocab); + } + + if (params.embedding) { + ctx->embedding.resize(hparams.n_embd); + } + + ctx->buf_compute.resize(FALCON_MEM_REQ_EVAL().at(ctx->model.type)); + + ctx->buf_scratch[0].resize(FALCON_MEM_REQ_SCRATCH0().at(ctx->model.type)); + ctx->buf_scratch[1].resize(FALCON_MEM_REQ_SCRATCH1().at(ctx->model.type)); + } + +#ifdef GGML_USE_METAL + if (params.n_gpu_layers > 0) { + // this allocates all Metal resources and memory buffers + ctx->ctx_metal = ggml_metal_init(); + + void *data_ptr = NULL; + size_t data_size = 0; + if (params.use_mmap) { + data_ptr = ctx->model.mapping->addr; + data_size = ctx->model.mapping->size; + } else { + data_ptr = ggml_get_mem_buffer(ctx->model.ctx); + data_size = ggml_get_mem_size(ctx->model.ctx); + } + +#define LLAMA_METAL_CHECK_BUF(result) \ + if (!(result)) { \ + fprintf(stderr, "%s: failed to add buffer\n", __func__); \ + falcon_free(ctx); \ + return NULL; \ + } + + LLAMA_METAL_CHECK_BUF( + ggml_metal_add_buffer(ctx->ctx_metal, "data", data_ptr, data_size)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer( + ctx->ctx_metal, "eval", ctx->buf_compute.addr, ctx->buf_compute.size)); + + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "kv", + ctx->model.kv_self.buf.addr, + ctx->model.kv_self.buf.size)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr0", + ctx->buf_scratch[0].addr, + ctx->buf_scratch[0].size)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr1", + ctx->buf_scratch[1].addr, + ctx->buf_scratch[1].size)); +#undef LLAMA_METAL_CHECK_BUF + } +#endif + + return ctx; +} + +void falcon_free(struct falcon_context *ctx) { delete ctx; } + +int falcon_model_quantize(const char *fname_inp, const char *fname_out, + const falcon_model_quantize_params *params) { + try { + falcon_model_quantize_internal(fname_inp, fname_out, params); + return 0; + } catch (const std::exception &err) { + fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.what()); + return 1; + } +} + +int falcon_apply_lora_from_file_internal(struct falcon_context *ctx, + const char *path_lora, + const char *path_base_model, + int n_threads) { + fprintf(stderr, "%s: applying lora adapter from '%s' - please wait ...\n", + __func__, path_lora); + + auto &model = ctx->model; + + const int64_t t_start_lora_us = ggml_time_us(); + + auto fin = std::ifstream(path_lora, std::ios::binary); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, path_lora); + return 1; + } + + // verify magic and version + { + uint32_t magic; + fin.read((char *)&magic, sizeof(magic)); + if (magic != LLAMA_FILE_MAGIC_GGLA) { + fprintf(stderr, "%s: bad file magic\n", __func__); + return 1; + } + uint32_t format_version; + fin.read((char *)&format_version, sizeof(format_version)); + + if (format_version != 1) { + fprintf(stderr, "%s: unsupported file version\n", __func__); + return 1; + } + } + + int32_t lora_r; + int32_t lora_alpha; + fin.read((char *)&lora_r, sizeof(lora_r)); + fin.read((char *)&lora_alpha, sizeof(lora_alpha)); + float scaling = (float)lora_alpha / (float)lora_r; + + fprintf(stderr, "%s: r = %d, alpha = %d, scaling = %.2f\n", __func__, lora_r, + lora_alpha, scaling); + + // create a temporary ggml context to store the lora tensors + // todo: calculate size from biggest possible tensor + std::vector lora_buf(1024ull * 1024ull * 1024ull); + struct ggml_init_params params; + params.mem_size = lora_buf.size(); + params.mem_buffer = lora_buf.data(); + params.no_alloc = false; + + ggml_context *lora_ctx = ggml_init(params); + std::unordered_map lora_tensors; + + // create a name -> tensor map of the model to accelerate lookups + std::unordered_map model_tensors; + for (auto &kv : model.tensors_by_name) { + model_tensors.insert(kv); + } + + // load base model + std::unique_ptr model_loader; + ggml_context *base_ctx = NULL; + llama_buffer base_buf; + if (path_base_model) { + fprintf(stderr, "%s: loading base model from '%s'\n", __func__, + path_base_model); + model_loader.reset(new falcon_model_loader( + path_base_model, /*use_mmap*/ true, /*vocab_only*/ false)); + + size_t ctx_size; + size_t mmapped_size; + model_loader->calc_sizes(&ctx_size, &mmapped_size); + base_buf.resize(ctx_size); + + ggml_init_params base_params; + base_params.mem_size = base_buf.size; + base_params.mem_buffer = base_buf.addr; + base_params.no_alloc = model_loader->use_mmap; + + base_ctx = ggml_init(base_params); + + model_loader->ggml_ctx = base_ctx; + + // maybe this should in falcon_model_loader + if (model_loader->use_mmap) { + model_loader->mapping.reset(new llama_mmap( + &model_loader->file_loaders.at(0)->file, /* prefetch */ 0)); + } + } + + // read tensors and apply + bool warned = false; + int n_tensors = 0; + while (true) { + int32_t n_dims; + int32_t length; + int32_t ftype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ftype), sizeof(ftype)); + if (fin.eof()) { + break; + } + + int32_t ne[2] = {1, 1}; + for (int i = 0; i < n_dims; ++i) { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + } + + std::string name; + { + char buf[1024]; + fin.read(buf, length); + name = std::string(buf, length); + } + + // check for lora suffix and get the type of tensor + const std::string lora_suffix = ".lora"; + size_t pos = name.rfind(lora_suffix); + if (pos == std::string::npos) { + fprintf(stderr, "%s: error: '%s' is not a lora tensor\n", __func__, + name.c_str()); + return 1; + } + + std::string lora_type = name.substr(pos + lora_suffix.length()); + std::string base_name = name; + base_name.erase(pos); + // fprintf(stderr, "%s: %s => %s (lora type %s) ", __func__, + // name.c_str(),base_name.c_str(), lora_type.c_str()); + + if (model_tensors.find(base_name) == model_tensors.end()) { + fprintf(stderr, "%s: unknown tensor '%s' in lora adapter\n", __func__, + name.data()); + return 1; + } + + // create ggml tensor + ggml_type wtype; + switch (ftype) { + case 0: + wtype = GGML_TYPE_F32; + break; + case 1: + wtype = GGML_TYPE_F16; + break; + default: { + fprintf(stderr, "%s: invalid tensor data type '%d'\n", __func__, ftype); + return false; + } + } + ggml_tensor *lora_tensor; + if (n_dims == 2) { + lora_tensor = ggml_new_tensor_2d(lora_ctx, wtype, ne[0], ne[1]); + } else { + fprintf(stderr, "%s: unsupported tensor dimension %d\n", __func__, + n_dims); + return 1; + } + + // load tensor data + size_t offset = fin.tellg(); + size_t tensor_data_size = ggml_nbytes(lora_tensor); + offset = (offset + 31) & -32; + fin.seekg(offset); + fin.read((char *)lora_tensor->data, tensor_data_size); + + lora_tensors[name] = lora_tensor; + + // check if we have both A and B tensors and apply + if (lora_tensors.find(base_name + ".loraA") != lora_tensors.end() && + lora_tensors.find(base_name + ".loraB") != lora_tensors.end()) { + ggml_tensor *dest_t = model_tensors[base_name]; + ggml_tensor *base_t; + if (model_loader) { + // load from base model + if (model_loader->tensors_map.name_to_idx.find(base_name) == + model_loader->tensors_map.name_to_idx.end()) { + fprintf(stderr, "%s: error: tensor '%s' not found in base model\n", + __func__, base_name.c_str()); + return 1; + } + size_t idx = model_loader->tensors_map.name_to_idx[base_name]; + llama_load_tensor < = model_loader->tensors_map.tensors[idx]; + base_t = model_loader->get_tensor( + base_name, {(uint32_t)dest_t->ne[0], (uint32_t)dest_t->ne[1]}, + GGML_BACKEND_CPU); + lt.data = (uint8_t *)lt.ggml_tensor->data; + model_loader->load_data_for(lt); + lt.ggml_tensor->data = lt.data; + } else { + base_t = dest_t; + } + + if (ggml_is_quantized(base_t->type)) { + if (!warned) { + fprintf(stderr, + "%s: warning: using a lora adapter with a quantized model " + "may result in poor quality, " + "use a f16 or f32 base model with --lora-base\n", + __func__); + warned = true; + } + } + + ggml_tensor *loraA = lora_tensors[base_name + ".loraA"]; + ggml_tensor *loraB = lora_tensors[base_name + ".loraB"]; + + if (base_t->ne[0] != loraA->ne[1] || base_t->ne[1] != loraB->ne[1]) { + fprintf(stderr, + "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 + ");" + " are you sure that this adapter is for this model?\n", + __func__, base_t->ne[0], loraA->ne[1]); + return 1; + } + + // w = w + BA*s + ggml_tensor *BA = ggml_mul_mat(lora_ctx, loraA, loraB); + + if (scaling != 1.0f) { + ggml_tensor *scale_tensor = ggml_new_f32(lora_ctx, scaling); + BA = ggml_scale_inplace(lora_ctx, BA, scale_tensor); + } + + ggml_tensor *r; + if (base_t == dest_t) { + r = ggml_add_inplace(lora_ctx, dest_t, BA); + } else { + r = ggml_add(lora_ctx, base_t, BA); + r = ggml_cpy(lora_ctx, r, dest_t); + } + + struct ggml_cgraph gf = ggml_build_forward(r); + gf.n_threads = n_threads; + ggml_graph_compute(lora_ctx, &gf); + + // we won't need these tensors again, reset the context to save memory + ggml_free(lora_ctx); + lora_ctx = ggml_init(params); + lora_tensors.clear(); + + n_tensors++; + if (n_tensors % 4 == 0) { + fprintf(stderr, "."); + } + } + } + + // TODO: this should be in a destructor, it will leak on failure + ggml_free(lora_ctx); + if (base_ctx) { + ggml_free(base_ctx); + } + + const int64_t t_lora_us = ggml_time_us() - t_start_lora_us; + fprintf(stderr, " done (%.2f ms)\n", t_lora_us / 1000.0); + + return 0; +} + +int falcon_apply_lora_from_file(struct falcon_context *ctx, + const char *path_lora, + const char *path_base_model, int n_threads) { + try { + return falcon_apply_lora_from_file_internal(ctx, path_lora, path_base_model, + n_threads); + } catch (const std::exception &err) { + fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, + err.what()); + return 1; + } +} + +int falcon_get_kv_cache_token_count(const struct falcon_context *ctx) { + return ctx->model.kv_self.n; +} + +#define LLAMA_MAX_RNG_STATE (64 * 1024) + +void falcon_set_rng_seed(struct falcon_context *ctx, int seed) { + if (seed < 0) { + seed = time(NULL); + } + ctx->rng.seed(seed); +} + +// Returns the *maximum* size of the state +size_t falcon_get_state_size(const struct falcon_context *ctx) { + // we don't know size of rng until we actually serialize it. so reserve more + // than enough memory for its serialized state. for reference, + // std::mt19937(1337) serializes to 6701 bytes. + const size_t s_rng_size = sizeof(size_t); + const size_t s_rng = LLAMA_MAX_RNG_STATE; + const size_t s_logits_capacity = sizeof(size_t); + const size_t s_logits_size = sizeof(size_t); + const size_t s_logits = ctx->logits.capacity() * sizeof(float); + const size_t s_embedding_size = sizeof(size_t); + const size_t s_embedding = ctx->embedding.size() * sizeof(float); + const size_t s_kv_size = sizeof(size_t); + const size_t s_kv_ntok = sizeof(int); + const size_t s_kv = ctx->model.kv_self.buf.size; + + const size_t s_total = + (+s_rng_size + s_rng + s_logits_capacity + s_logits_size + s_logits + + s_embedding_size + s_embedding + s_kv_size + s_kv_ntok + s_kv); + + return s_total; +} + +// Copies the state to the specified destination address +size_t falcon_copy_state_data(struct falcon_context *ctx, uint8_t *dst) { + uint8_t *out = dst; + + // copy rng + { + std::stringstream rng_ss; + rng_ss << ctx->rng; + + const size_t rng_size = rng_ss.str().size(); + char rng_buf[LLAMA_MAX_RNG_STATE]; + + memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE); + memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size()); + + memcpy(out, &rng_size, sizeof(rng_size)); + out += sizeof(rng_size); + memcpy(out, &rng_buf[0], LLAMA_MAX_RNG_STATE); + out += LLAMA_MAX_RNG_STATE; + } + + // copy logits + { + const size_t logits_cap = ctx->logits.capacity(); + const size_t logits_size = ctx->logits.size(); + + memcpy(out, &logits_cap, sizeof(logits_cap)); + out += sizeof(logits_cap); + memcpy(out, &logits_size, sizeof(logits_size)); + out += sizeof(logits_size); + + if (logits_size) { + memcpy(out, ctx->logits.data(), logits_size * sizeof(float)); + } + + out += logits_cap * sizeof(float); + } + + // copy embeddings + { + const size_t embedding_size = ctx->embedding.size(); + + memcpy(out, &embedding_size, sizeof(embedding_size)); + out += sizeof(embedding_size); + + if (embedding_size) { + memcpy(out, ctx->embedding.data(), embedding_size * sizeof(float)); + out += embedding_size * sizeof(float); + } + } + + // copy kv cache + { + const auto &kv_self = ctx->model.kv_self; + const auto &hparams = ctx->model.hparams; + const int n_layer = hparams.n_layer; + const int n_embd = hparams.n_embd; + const int n_ctx = hparams.n_ctx; + + const size_t kv_size = kv_self.buf.size; + const int kv_ntok = falcon_get_kv_cache_token_count(ctx); + + memcpy(out, &kv_size, sizeof(kv_size)); + out += sizeof(kv_size); + memcpy(out, &kv_ntok, sizeof(kv_ntok)); + out += sizeof(kv_ntok); + + if (kv_size) { + const size_t elt_size = ggml_element_size(kv_self.k); + + char buffer[4096]; + + ggml_context *cpy_ctx = + ggml_init({sizeof(buffer), buffer, /* no_alloc */ true}); + ggml_cgraph gf{}; + gf.n_threads = 1; + + ggml_tensor *kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, + kv_ntok, n_layer); + kout3d->data = out; + out += ggml_nbytes(kout3d); + + ggml_tensor *vout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.v->type, + kv_ntok, n_embd, n_layer); + vout3d->data = out; + out += ggml_nbytes(vout3d); + + ggml_tensor *k3d = + ggml_view_3d(cpy_ctx, kv_self.k, n_embd, kv_ntok, n_layer, + elt_size * n_embd, elt_size * n_embd * n_ctx, 0); + + ggml_tensor *v3d = + ggml_view_3d(cpy_ctx, kv_self.v, kv_ntok, n_embd, n_layer, + elt_size * n_ctx, elt_size * n_ctx * n_embd, 0); + + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d)); + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d)); + ggml_graph_compute(cpy_ctx, &gf); + + ggml_free(cpy_ctx); + } + } + + const size_t written = out - dst; + const size_t max_size = falcon_get_state_size(ctx); + + LLAMA_ASSERT(written <= max_size); + + return written; +} + +// Sets the state reading from the specified source address +size_t falcon_set_state_data(struct falcon_context *ctx, uint8_t *src) { + uint8_t *inp = src; + + // set rng + { + size_t rng_size; + char rng_buf[LLAMA_MAX_RNG_STATE]; + + memcpy(&rng_size, inp, sizeof(rng_size)); + inp += sizeof(rng_size); + memcpy(&rng_buf[0], inp, LLAMA_MAX_RNG_STATE); + inp += LLAMA_MAX_RNG_STATE; + + std::stringstream rng_ss; + rng_ss.str(std::string(&rng_buf[0], rng_size)); + rng_ss >> ctx->rng; + + LLAMA_ASSERT(rng_ss.fail() == false); + } + + // set logits + { + size_t logits_cap; + size_t logits_size; + + memcpy(&logits_cap, inp, sizeof(logits_cap)); + inp += sizeof(logits_cap); + memcpy(&logits_size, inp, sizeof(logits_size)); + inp += sizeof(logits_size); + + LLAMA_ASSERT(ctx->logits.capacity() == logits_cap); + + if (logits_size) { + ctx->logits.resize(logits_size); + memcpy(ctx->logits.data(), inp, logits_size * sizeof(float)); + } + + inp += logits_cap * sizeof(float); + } + + // set embeddings + { + size_t embedding_size; + + memcpy(&embedding_size, inp, sizeof(embedding_size)); + inp += sizeof(embedding_size); + + LLAMA_ASSERT(ctx->embedding.capacity() == embedding_size); + + if (embedding_size) { + memcpy(ctx->embedding.data(), inp, embedding_size * sizeof(float)); + inp += embedding_size * sizeof(float); + } + } + + // set kv cache + { + const auto &kv_self = ctx->model.kv_self; + const auto &hparams = ctx->model.hparams; + const int n_layer = hparams.n_layer; + const int n_embd = hparams.n_embd; + const int n_ctx = hparams.n_ctx; + + size_t kv_size; + int kv_ntok; + + memcpy(&kv_size, inp, sizeof(kv_size)); + inp += sizeof(kv_size); + memcpy(&kv_ntok, inp, sizeof(kv_ntok)); + inp += sizeof(kv_ntok); + + if (kv_size) { + LLAMA_ASSERT(kv_self.buf.size == kv_size); + + const size_t elt_size = ggml_element_size(kv_self.k); + + char buffer[4096]; + + ggml_context *cpy_ctx = + ggml_init({sizeof(buffer), buffer, /* no_alloc */ true}); + ggml_cgraph gf{}; + gf.n_threads = 1; + + ggml_tensor *kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, + kv_ntok, n_layer); + kin3d->data = (void *)inp; + inp += ggml_nbytes(kin3d); + + ggml_tensor *vin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.v->type, kv_ntok, + n_embd, n_layer); + vin3d->data = (void *)inp; + inp += ggml_nbytes(vin3d); + + ggml_tensor *k3d = + ggml_view_3d(cpy_ctx, kv_self.k, n_embd, kv_ntok, n_layer, + elt_size * n_embd, elt_size * n_embd * n_ctx, 0); + + ggml_tensor *v3d = + ggml_view_3d(cpy_ctx, kv_self.v, kv_ntok, n_embd, n_layer, + elt_size * n_ctx, elt_size * n_ctx * n_embd, 0); + + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d)); + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d)); + ggml_graph_compute(cpy_ctx, &gf); + + ggml_free(cpy_ctx); + } + + ctx->model.kv_self.n = kv_ntok; + } + + const size_t nread = inp - src; + const size_t max_size = falcon_get_state_size(ctx); + + LLAMA_ASSERT(nread <= max_size); + + return nread; +} + +bool falcon_load_session_file(struct falcon_context *ctx, + const char *path_session, llama_token *tokens_out, + size_t n_token_capacity, + size_t *n_token_count_out) { + llama_file file(path_session, "rb"); + + // sanity checks + { + const uint32_t magic = file.read_u32(); + const uint32_t version = file.read_u32(); + + if (magic != LLAMA_SESSION_MAGIC || version != LLAMA_SESSION_VERSION) { + fprintf(stderr, + "%s : unknown (magic, version) for session file: %08x, %08x\n", + __func__, magic, version); + return false; + } + + falcon_hparams session_hparams; + file.read_raw(&session_hparams, sizeof(falcon_hparams)); + + if (session_hparams != ctx->model.hparams) { + fprintf(stderr, "%s : model hparams didn't match from session file!\n", + __func__); + return false; + } + } + + // load the prompt + { + const uint32_t n_token_count = file.read_u32(); + + if (n_token_count > n_token_capacity) { + fprintf(stderr, + "%s : token count in session file exceeded capacity! %u > %zu\n", + __func__, n_token_count, n_token_capacity); + return false; + } + + file.read_raw(tokens_out, sizeof(llama_token) * n_token_count); + *n_token_count_out = n_token_count; + } + + // restore the context state + { + const size_t n_state_size_cur = file.size - file.tell(); + const size_t n_state_size_max = falcon_get_state_size(ctx); + + if (n_state_size_cur > n_state_size_max) { + fprintf( + stderr, + "%s : the state size in session file is too big! max %zu, got %zu\n", + __func__, n_state_size_max, n_state_size_cur); + return false; + } + + std::vector state_data(n_state_size_max); + file.read_raw(state_data.data(), n_state_size_cur); + + falcon_set_state_data(ctx, state_data.data()); + } + + return true; +} + +bool falcon_save_session_file(struct falcon_context *ctx, + const char *path_session, + const llama_token *tokens, size_t n_token_count) { + llama_file file(path_session, "wb"); + + file.write_u32(LLAMA_SESSION_MAGIC); + file.write_u32(LLAMA_SESSION_VERSION); + + file.write_raw(&ctx->model.hparams, sizeof(falcon_hparams)); + + // save the prompt + file.write_u32((uint32_t)n_token_count); + file.write_raw(tokens, sizeof(llama_token) * n_token_count); + + // save the context state + { + const size_t n_state_size_max = falcon_get_state_size(ctx); + + std::vector state_data(n_state_size_max); + const size_t n_state_size_cur = + falcon_copy_state_data(ctx, state_data.data()); + + file.write_raw(state_data.data(), n_state_size_cur); + } + + return true; +} + +int falcon_eval(struct falcon_context *ctx, const llama_token *tokens, + int n_tokens, int n_past, int n_threads, int debug_timings) { + if (!falcon_eval_internal(*ctx, tokens, n_tokens, n_past, n_threads, nullptr, + debug_timings)) { + fprintf(stderr, "%s: failed to eval\n", __func__); + return 1; + } + + // get a more accurate load time, upon first eval + // TODO: fix this + if (!ctx->has_evaluated_once) { + ctx->t_load_us = ggml_time_us() - ctx->t_start_us; + ctx->has_evaluated_once = true; + } + + return 0; +} + +int falcon_eval_export(struct falcon_context *ctx, const char *fname) { + const int n_batch = 1; + const int n_ctx = 512 - n_batch; + + const std::vector tmp(n_batch, falcon_token_bos()); + + if (!falcon_eval_internal(*ctx, tmp.data(), tmp.size(), n_ctx, 1, fname, 0)) { + fprintf(stderr, "%s: failed to eval\n", __func__); + return 1; + } + + return 0; +} + +int falcon_tokenize(struct falcon_context *ctx, const char *text, + llama_token *tokens, int n_max_tokens, bool add_bos) { + auto res = falcon_tokenize(ctx->vocab, text, add_bos); + + if (n_max_tokens < (int)res.size()) { + fprintf(stderr, "%s: too many tokens\n", __func__); + return -((int)res.size()); + } + + for (size_t i = 0; i < res.size(); i++) { + tokens[i] = res[i]; + } + + return res.size(); +} + +int falcon_n_vocab(const struct falcon_context *ctx) { + return ctx->vocab.id_to_token.size(); +} + +int falcon_n_ctx(const struct falcon_context *ctx) { + return ctx->model.hparams.n_ctx; +} + +int falcon_n_embd(const struct falcon_context *ctx) { + return ctx->model.hparams.n_embd; +} + +int falcon_get_vocab(const struct falcon_context *ctx, const char **strings, + float *scores, int capacity) { + int n = std::min(capacity, (int)ctx->vocab.id_to_token.size()); + for (int i = 0; i < n; ++i) { + strings[i] = ctx->vocab.id_to_token[i].tok.c_str(); + scores[i] = ctx->vocab.id_to_token[i].score; + } + return n; +} + +float *falcon_get_logits(struct falcon_context *ctx) { + return ctx->logits.data(); +} + +float *falcon_get_embeddings(struct falcon_context *ctx) { + return ctx->embedding.data(); +} + +const char *falcon_token_to_str(const struct falcon_context *ctx, + llama_token token) { + if (token >= falcon_n_vocab(ctx)) { + return nullptr; + } + + return ctx->vocab.id_to_token[token].tok.c_str(); +} + +llama_token falcon_token_bos() { return 11; } + +llama_token falcon_token_eos() { return 11; } + +llama_token falcon_token_nl() { return 193; } + +llama_token falcon_token_cr() { return 195; } + +void falcon_print_timings(struct falcon_context *ctx) { + const int64_t t_end_us = ggml_time_us(); + + const int32_t n_sample = std::max(1, ctx->n_sample); + const int32_t n_eval = std::max(1, ctx->n_eval); + const int32_t n_p_eval = std::max(1, ctx->n_p_eval); + + fprintf(stderr, "\n"); + fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, + ctx->t_load_us / 1000.0); + fprintf(stderr, + "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token)\n", + __func__, 1e-3 * ctx->t_sample_us, n_sample, + 1e-3 * ctx->t_sample_us / n_sample); + fprintf(stderr, + "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token)\n", + __func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, + 1e-3 * ctx->t_p_eval_us / n_p_eval); + fprintf(stderr, + "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token)\n", + __func__, 1e-3 * ctx->t_eval_us, n_eval, + 1e-3 * ctx->t_eval_us / n_eval); + fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, + (t_end_us - ctx->t_start_us) / 1000.0); +} + +void falcon_reset_timings(struct falcon_context *ctx) { + ctx->t_start_us = ggml_time_us(); + ctx->t_sample_us = ctx->n_sample = 0; + ctx->t_eval_us = ctx->n_eval = 0; + ctx->t_p_eval_us = ctx->n_p_eval = 0; +} + +const char *falcon_print_system_info(void) { + static std::string s; + + s = ""; + s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | "; + s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | "; + s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | "; + s += "AVX512_VBMI = " + std::to_string(ggml_cpu_has_avx512_vbmi()) + " | "; + s += "AVX512_VNNI = " + std::to_string(ggml_cpu_has_avx512_vnni()) + " | "; + s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | "; + s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | "; + s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | "; + s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | "; + s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | "; + s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | "; + s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | "; + s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | "; + s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | "; + + return s.c_str(); +} + +// For internal test use +std::vector> + &llama_internal_get_tensor_map(struct falcon_context *ctx) { + return ctx->model.tensors_by_name; +} diff --git a/ctransformers/models/ggml/libfalcon.h b/ctransformers/models/ggml/libfalcon.h new file mode 100644 index 0000000000000000000000000000000000000000..2342214ab2920c26a372dafb5d840b28f96c705f --- /dev/null +++ b/ctransformers/models/ggml/libfalcon.h @@ -0,0 +1,334 @@ +#ifndef FALCON_H +#define FALCON_H + +#include "ggml.h" +#ifdef GGML_USE_CUBLAS +#include "ggml-cuda.h" +#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES +#else +#define LLAMA_MAX_DEVICES 1 +#endif // GGML_USE_CUBLAS +#include +#include +#include + +#ifdef LLAMA_SHARED +#if defined(_WIN32) && !defined(__MINGW32__) +#ifdef LLAMA_BUILD +#define LLAMA_API __declspec(dllexport) +#else +#define LLAMA_API __declspec(dllimport) +#endif +#else +#define LLAMA_API __attribute__((visibility("default"))) +#endif +#else +#define LLAMA_API +#endif + +#define LLAMA_FILE_MAGIC_GGJT 0x67676a74u // 'ggjt' +#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla' +#define LLAMA_FILE_MAGIC_GGMF 0x67676d66u // 'ggmf' +#define LLAMA_FILE_MAGIC_GGML 0x67676d6cu // 'ggml' +#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn' + +#define LLAMA_FILE_VERSION 3 +#define LLAMA_FILE_MAGIC LLAMA_FILE_MAGIC_GGJT +#define LLAMA_FILE_MAGIC_UNVERSIONED LLAMA_FILE_MAGIC_GGML +#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN +#define LLAMA_SESSION_VERSION 1 + +#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || \ + defined(GGML_USE_METAL) +// Defined when llama.cpp is compiled with support for offloading model layers +// to GPU. +#define LLAMA_SUPPORTS_GPU_OFFLOAD +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// +// C interface +// +// TODO: show sample usage +// + +struct falcon_context; + +struct falcon_context_params { + int n_ctx; // text context + int n_batch; // prompt processing batch size + int n_gpu_layers; // number of layers to store in VRAM + int i_gpu_start; // first gpu layer + int i_gpu_last; // last gpu layer + int main_gpu; // the GPU that is used for scratch and small tensors + float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple + // GPUs + int seed; // RNG seed, -1 for random + + bool f16_kv; // use fp16 for KV cache + bool logits_all; // the llama_eval() call computes all logits, not just the + // last one + bool vocab_only; // only load the vocabulary, no weights + bool use_mmap; // use mmap if possible + bool use_mlock; // force system to keep model in RAM + bool embedding; // embedding mode only + + // called with a progress value between 0 and 1, pass NULL to disable + llama_progress_callback progress_callback; + // context pointer passed to the progress callback + void *progress_callback_user_data; +}; + +// model quantization parameters +typedef struct falcon_model_quantize_params { + int nthread; // number of threads to use for quantizing, if <=0 will use + // std::thread::hardware_concurrency() + enum llama_ftype ftype; // quantize to this llama_ftype + bool allow_requantize; // allow quantizing non-f32/f16 tensors + bool quantize_output_tensor; // quantize output.weight +} falcon_model_quantize_params; + +LLAMA_API struct falcon_context_params falcon_context_default_params(); +LLAMA_API struct falcon_model_quantize_params +falcon_model_quantize_default_params(); + +// Various functions for loading a ggml llama model. +// Allocate (almost) all memory needed for the model. +// Return NULL on failure +LLAMA_API struct falcon_context *falcon_init_from_file( + const char *path_model, struct falcon_context_params params); + +// Frees all allocated memory +LLAMA_API void falcon_free(struct falcon_context *ctx); + +// Returns 0 on success +LLAMA_API int falcon_model_quantize(const char *fname_inp, + const char *fname_out, + const falcon_model_quantize_params *params); + +// Apply a LoRA adapter to a loaded model +// path_base_model is the path to a higher quality model to use as a base for +// the layers modified by the adapter. Can be NULL to use the current loaded +// model. The model needs to be reloaded before applying a new adapter, +// otherwise the adapter will be applied on top of the previous one Returns 0 on +// success +LLAMA_API int falcon_apply_lora_from_file(struct falcon_context *ctx, + const char *path_lora, + const char *path_base_model, + int n_threads); + +// Returns the number of tokens in the KV cache +LLAMA_API int falcon_get_kv_cache_token_count(const struct falcon_context *ctx); + +// Sets the current rng seed. +LLAMA_API void falcon_set_rng_seed(struct falcon_context *ctx, int seed); + +// Returns the maximum size in bytes of the state (rng, logits, embedding +// and kv_cache) - will often be smaller after compacting tokens +LLAMA_API size_t falcon_get_state_size(const struct falcon_context *ctx); + +// Copies the state to the specified destination address. +// Destination needs to have allocated enough memory. +// Returns the number of bytes copied +LLAMA_API size_t falcon_copy_state_data(struct falcon_context *ctx, + uint8_t *dst); + +// Set the state reading from the specified address +// Returns the number of bytes read +LLAMA_API size_t falcon_set_state_data(struct falcon_context *ctx, + uint8_t *src); + +// Save/load session file +LLAMA_API bool falcon_load_session_file(struct falcon_context *ctx, + const char *path_session, + llama_token *tokens_out, + size_t n_token_capacity, + size_t *n_token_count_out); +LLAMA_API bool falcon_save_session_file(struct falcon_context *ctx, + const char *path_session, + const llama_token *tokens, + size_t n_token_count); + +// Run the llama inference to obtain the logits and probabilities for the next +// token. tokens + n_tokens is the provided batch of new tokens to process +// n_past is the number of tokens to use from previous eval calls +// Returns 0 on success +LLAMA_API int falcon_eval(struct falcon_context *ctx, const llama_token *tokens, + int n_tokens, int n_past, int n_threads, + int debug_timings); + +// Export a static computation graph for context of 511 and batch size of 1 +// NOTE: since this functionality is mostly for debugging and demonstration +// purposes, we hardcode these +// parameters here to keep things simple +// IMPORTANT: do not use for anything else other than debugging and testing! +LLAMA_API int falcon_eval_export(struct falcon_context *ctx, const char *fname); + +// Convert the provided text into tokens. +// The tokens pointer must be large enough to hold the resulting tokens. +// Returns the number of tokens on success, no more than n_max_tokens +// Returns a negative number on failure - the number of tokens that would have +// been returned +// TODO: not sure if correct +LLAMA_API int falcon_tokenize(struct falcon_context *ctx, const char *text, + llama_token *tokens, int n_max_tokens, + bool add_bos); + +LLAMA_API int falcon_n_vocab(const struct falcon_context *ctx); +LLAMA_API int falcon_n_ctx(const struct falcon_context *ctx); +LLAMA_API int falcon_n_embd(const struct falcon_context *ctx); + +// Get the vocabulary as output parameters. +// Returns number of results. +LLAMA_API int falcon_get_vocab(const struct falcon_context *ctx, + const char **strings, float *scores, + int capacity); + +// Token logits obtained from the last call to llama_eval() +// The logits for the last token are stored in the last row +// Can be mutated in order to change the probabilities of the next token +// Rows: n_tokens +// Cols: n_vocab +LLAMA_API float *falcon_get_logits(struct falcon_context *ctx); + +// Get the embeddings for the input +// shape: [n_embd] (1-dimensional) +LLAMA_API float *falcon_get_embeddings(struct falcon_context *ctx); + +// Token Id -> String. Uses the vocabulary in the provided context +LLAMA_API const char *falcon_token_to_str(const struct falcon_context *ctx, + llama_token token); + +// Special tokens +LLAMA_API llama_token falcon_token_bos(); +LLAMA_API llama_token falcon_token_eos(); +LLAMA_API llama_token falcon_token_nl(); + +// Sampling functions + +/// @details Repetition penalty described in CTRL academic paper +/// https://arxiv.org/abs/1909.05858, with negative logit fix. +LLAMA_API void falcon_sample_repetition_penalty( + struct falcon_context *ctx, llama_token_data_array *candidates, + const llama_token *last_tokens, size_t last_tokens_size, float penalty); + +/// @details Frequency and presence penalties described in OpenAI API +/// https://platform.openai.com/docs/api-reference/parameter-details. +LLAMA_API void falcon_sample_frequency_and_presence_penalties( + struct falcon_context *ctx, llama_token_data_array *candidates, + const llama_token *last_tokens, size_t last_tokens_size, + float alpha_frequency, float alpha_presence); + +/// @details Sorts candidate tokens by their logits in descending order and +/// calculate probabilities based on logits. +LLAMA_API void falcon_sample_softmax(struct falcon_context *ctx, + llama_token_data_array *candidates); + +/// @details Top-K sampling described in academic paper "The Curious Case of +/// Neural Text Degeneration" https://arxiv.org/abs/1904.09751 +LLAMA_API void falcon_sample_top_k(struct falcon_context *ctx, + llama_token_data_array *candidates, int k, + size_t min_keep); + +/// @details Nucleus sampling described in academic paper "The Curious Case of +/// Neural Text Degeneration" https://arxiv.org/abs/1904.09751 +LLAMA_API void falcon_sample_top_p(struct falcon_context *ctx, + llama_token_data_array *candidates, float p, + size_t min_keep); + +/// @details Tail Free Sampling described in +/// https://www.trentonbricken.com/Tail-Free-Sampling/. +LLAMA_API void falcon_sample_tail_free(struct falcon_context *ctx, + llama_token_data_array *candidates, + float z, size_t min_keep); + +/// @details Locally Typical Sampling implementation described in the paper +/// https://arxiv.org/abs/2202.00666. +LLAMA_API void falcon_sample_typical(struct falcon_context *ctx, + llama_token_data_array *candidates, + float p, size_t min_keep); +LLAMA_API void falcon_sample_temperature(struct falcon_context *ctx, + llama_token_data_array *candidates, + float temp); + +/// @details Mirostat 1.0 algorithm described in the paper +/// https://arxiv.org/abs/2007.14966. Uses tokens instead of words. +/// @param candidates A vector of `llama_token_data` containing the candidate +/// tokens, their probabilities (p), and log-odds (logit) for the current +/// position in the generated text. +/// @param tau The target cross-entropy (or surprise) value you want to achieve +/// for the generated text. A higher value corresponds to more surprising or +/// less predictable text, while a lower value corresponds to less surprising or +/// more predictable text. +/// @param eta The learning rate used to update `mu` based on the error between +/// the target and observed surprisal of the sampled word. A larger learning +/// rate will cause `mu` to be updated more quickly, while a smaller learning +/// rate will result in slower updates. +/// @param m The number of tokens considered in the estimation of `s_hat`. This +/// is an arbitrary value that is used to calculate `s_hat`, which in turn helps +/// to calculate the value of `k`. In the paper, they use `m = 100`, but you can +/// experiment with different values to see how it affects the performance of +/// the algorithm. +/// @param mu Maximum cross-entropy. This value is initialized to be twice the +/// target cross-entropy (`2 * tau`) and is updated in the algorithm based on +/// the error between the target and observed surprisal. +LLAMA_API llama_token falcon_sample_token_mirostat( + struct falcon_context *ctx, llama_token_data_array *candidates, float tau, + float eta, int m, float *mu); + +/// @details Mirostat 2.0 algorithm described in the paper +/// https://arxiv.org/abs/2007.14966. Uses tokens instead of words. +/// @param candidates A vector of `llama_token_data` containing the candidate +/// tokens, their probabilities (p), and log-odds (logit) for the current +/// position in the generated text. +/// @param tau The target cross-entropy (or surprise) value you want to achieve +/// for the generated text. A higher value corresponds to more surprising or +/// less predictable text, while a lower value corresponds to less surprising or +/// more predictable text. +/// @param eta The learning rate used to update `mu` based on the error between +/// the target and observed surprisal of the sampled word. A larger learning +/// rate will cause `mu` to be updated more quickly, while a smaller learning +/// rate will result in slower updates. +/// @param mu Maximum cross-entropy. This value is initialized to be twice the +/// target cross-entropy (`2 * tau`) and is updated in the algorithm based on +/// the error between the target and observed surprisal. +LLAMA_API llama_token falcon_sample_token_mirostat_v2( + struct falcon_context *ctx, llama_token_data_array *candidates, float tau, + float eta, float *mu); + +/// @details Selects the token with the highest probability. +LLAMA_API llama_token falcon_sample_token_greedy( + struct falcon_context *ctx, llama_token_data_array *candidates); + +/// @details Randomly selects a token from the candidates based on their +/// probabilities. +LLAMA_API llama_token falcon_sample_token(struct falcon_context *ctx, + llama_token_data_array *candidates); + +// Performance information +LLAMA_API void falcon_print_timings(struct falcon_context *ctx); +LLAMA_API void falcon_reset_timings(struct falcon_context *ctx); + +// Print system information +LLAMA_API const char *falcon_print_system_info(void); + +#ifdef __cplusplus +} +#endif + +// Internal API to be implemented by llama.cpp and used by tests/benchmarks only +#ifdef LLAMA_API_INTERNAL + +#include +#include +struct ggml_tensor; + +std::vector> + &llama_internal_get_tensor_map(struct falcon_context *ctx); + +#endif + +#endif diff --git a/ctransformers/models/ggml/llama-util.h b/ctransformers/models/ggml/llama-util.h new file mode 100644 index 0000000000000000000000000000000000000000..4f8a4296adc4eaf3d66cd453097cd2ab82db72c9 --- /dev/null +++ b/ctransformers/models/ggml/llama-util.h @@ -0,0 +1,490 @@ +// Internal header to be included only by llama.cpp. +// Contains wrappers around OS interfaces. + +#ifndef LLAMA_UTIL_H +#define LLAMA_UTIL_H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef __has_include + #if __has_include() + #include + #if defined(_POSIX_MAPPED_FILES) + #include + #endif + #if defined(_POSIX_MEMLOCK_RANGE) + #include + #endif + #endif +#endif + +#if defined(_WIN32) + #define WIN32_LEAN_AND_MEAN + #ifndef NOMINMAX + #define NOMINMAX + #endif + #include + #include + #include // for _fseeki64 +#endif + +#define LLAMA_ASSERT(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "LLAMA_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \ + abort(); \ + } \ + } while (0) + +#ifdef __GNUC__ +#ifdef __MINGW32__ +__attribute__((format(gnu_printf, 1, 2))) +#else +__attribute__((format(printf, 1, 2))) +#endif +#endif +static std::string format(const char * fmt, ...) { + va_list ap, ap2; + va_start(ap, fmt); + va_copy(ap2, ap); + int size = vsnprintf(NULL, 0, fmt, ap); + LLAMA_ASSERT(size >= 0 && size < INT_MAX); + std::vector buf(size + 1); + int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); + LLAMA_ASSERT(size2 == size); + va_end(ap2); + va_end(ap); + return std::string(buf.data(), size); +} + +struct llama_file { + // use FILE * so we don't have to re-open the file to mmap + FILE * fp; + size_t size; + + llama_file(const char * fname, const char * mode) { + fp = std::fopen(fname, mode); + if (fp == NULL) { + throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno))); + } + seek(0, SEEK_END); + size = tell(); + seek(0, SEEK_SET); + } + + size_t tell() const { +#ifdef _WIN32 + __int64 ret = _ftelli64(fp); +#else + long ret = std::ftell(fp); +#endif + LLAMA_ASSERT(ret != -1); // this really shouldn't fail + return (size_t) ret; + } + + void seek(size_t offset, int whence) { +#ifdef _WIN32 + int ret = _fseeki64(fp, (__int64) offset, whence); +#else + int ret = std::fseek(fp, (long) offset, whence); +#endif + LLAMA_ASSERT(ret == 0); // same + } + + void read_raw(void * ptr, size_t len) const { + if (len == 0) { + return; + } + errno = 0; + std::size_t ret = std::fread(ptr, len, 1, fp); + if (ferror(fp)) { + throw std::runtime_error(format("read error: %s", strerror(errno))); + } + if (ret != 1) { + throw std::runtime_error(std::string("unexpectedly reached end of file")); + } + } + + std::uint32_t read_u32() { + std::uint32_t ret; + read_raw(&ret, sizeof(ret)); + return ret; + } + + std::string read_string(std::uint32_t len) { + std::vector chars(len); + read_raw(chars.data(), len); + return std::string(chars.data(), len); + } + + void write_raw(const void * ptr, size_t len) const { + if (len == 0) { + return; + } + errno = 0; + size_t ret = std::fwrite(ptr, len, 1, fp); + if (ret != 1) { + throw std::runtime_error(format("write error: %s", strerror(errno))); + } + } + + void write_u32(std::uint32_t val) { + write_raw(&val, sizeof(val)); + } + + ~llama_file() { + if (fp) { + std::fclose(fp); + } + } +}; + +#if defined(_WIN32) +static std::string llama_format_win_err(DWORD err) { + LPSTR buf; + size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL); + if (!size) { + return "FormatMessageA failed"; + } + std::string ret(buf, size); + LocalFree(buf); + return ret; +} +#endif + +struct llama_mmap { + void * addr; + size_t size; + + llama_mmap(const llama_mmap &) = delete; + +#ifdef _POSIX_MAPPED_FILES + static constexpr bool SUPPORTED = true; + + llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) { + size = file->size; + int fd = fileno(file->fp); + int flags = MAP_SHARED; +#ifdef __linux__ + flags |= MAP_POPULATE; +#endif + addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0); + if (addr == MAP_FAILED) { + throw std::runtime_error(format("mmap failed: %s", strerror(errno))); + } + + if (prefetch > 0) { + // Advise the kernel to preload the mapped memory + if (madvise(addr, std::min(file->size, prefetch), MADV_WILLNEED)) { + fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n", + strerror(errno)); + } + } + } + + ~llama_mmap() { + munmap(addr, size); + } +#elif defined(_WIN32) + static constexpr bool SUPPORTED = true; + + llama_mmap(struct llama_file * file, bool prefetch = true) { + size = file->size; + + HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp)); + + HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL); + DWORD error = GetLastError(); + + if (hMapping == NULL) { + throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str())); + } + + addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0); + error = GetLastError(); + CloseHandle(hMapping); + + if (addr == NULL) { + throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str())); + } + + #if _WIN32_WINNT >= _WIN32_WINNT_WIN8 + if (prefetch) { + // Advise the kernel to preload the mapped memory + WIN32_MEMORY_RANGE_ENTRY range; + range.VirtualAddress = addr; + range.NumberOfBytes = (SIZE_T)size; + if (!PrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) { + fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n", + llama_format_win_err(GetLastError()).c_str()); + } + } + #else + #pragma message("warning: You are building for pre-Windows 8; prefetch not supported") + #endif // _WIN32_WINNT >= _WIN32_WINNT_WIN8 + } + + ~llama_mmap() { + if (!UnmapViewOfFile(addr)) { + fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n", + llama_format_win_err(GetLastError()).c_str()); + } + } +#else + static constexpr bool SUPPORTED = false; + + llama_mmap(struct llama_file *, bool prefetch = true) { + (void)prefetch; + throw std::runtime_error(std::string("mmap not supported")); + } +#endif +}; + +// Represents some region of memory being locked using mlock or VirtualLock; +// will automatically unlock on destruction. +struct llama_mlock { + void * addr = NULL; + size_t size = 0; + bool failed_already = false; + + llama_mlock() {} + llama_mlock(const llama_mlock &) = delete; + + ~llama_mlock() { + if (size) { + raw_unlock(addr, size); + } + } + + void init(void * ptr) { + LLAMA_ASSERT(addr == NULL && size == 0); + addr = ptr; + } + + void grow_to(size_t target_size) { + LLAMA_ASSERT(addr); + if (failed_already) { + return; + } + size_t granularity = lock_granularity(); + target_size = (target_size + granularity - 1) & ~(granularity - 1); + if (target_size > size) { + if (raw_lock((uint8_t *) addr + size, target_size - size)) { + size = target_size; + } else { + failed_already = true; + } + } + } + +#ifdef _POSIX_MEMLOCK_RANGE + static constexpr bool SUPPORTED = true; + + size_t lock_granularity() { + return (size_t) sysconf(_SC_PAGESIZE); + } + + #ifdef __APPLE__ + #define MLOCK_SUGGESTION \ + "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \ + "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l).\n" + #else + #define MLOCK_SUGGESTION \ + "Try increasing RLIMIT_MLOCK ('ulimit -l' as root).\n" + #endif + + bool raw_lock(const void * addr, size_t size) { + if (!mlock(addr, size)) { + return true; + } else { + char* errmsg = std::strerror(errno); + bool suggest = (errno == ENOMEM); + + // Check if the resource limit is fine after all + struct rlimit lock_limit; + if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) + suggest = false; + if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size)) + suggest = false; + + fprintf(stderr, "warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s", + size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : ""); + return false; + } + } + + #undef MLOCK_SUGGESTION + + void raw_unlock(void * addr, size_t size) { + if (munlock(addr, size)) { + fprintf(stderr, "warning: failed to munlock buffer: %s\n", std::strerror(errno)); + } + } +#elif defined(_WIN32) + static constexpr bool SUPPORTED = true; + + size_t lock_granularity() { + SYSTEM_INFO si; + GetSystemInfo(&si); + return (size_t) si.dwPageSize; + } + + bool raw_lock(void * ptr, size_t len) { + for (int tries = 1; ; tries++) { + if (VirtualLock(ptr, len)) { + return true; + } + if (tries == 2) { + fprintf(stderr, "warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n", + len, size, llama_format_win_err(GetLastError()).c_str()); + return false; + } + + // It failed but this was only the first try; increase the working + // set size and try again. + SIZE_T min_ws_size, max_ws_size; + if (!GetProcessWorkingSetSize(GetCurrentProcess(), &min_ws_size, &max_ws_size)) { + fprintf(stderr, "warning: GetProcessWorkingSetSize failed: %s\n", + llama_format_win_err(GetLastError()).c_str()); + return false; + } + // Per MSDN: "The maximum number of pages that a process can lock + // is equal to the number of pages in its minimum working set minus + // a small overhead." + // Hopefully a megabyte is enough overhead: + size_t increment = len + 1048576; + // The minimum must be <= the maximum, so we need to increase both: + min_ws_size += increment; + max_ws_size += increment; + if (!SetProcessWorkingSetSize(GetCurrentProcess(), min_ws_size, max_ws_size)) { + fprintf(stderr, "warning: SetProcessWorkingSetSize failed: %s\n", + llama_format_win_err(GetLastError()).c_str()); + return false; + } + } + } + + void raw_unlock(void * ptr, size_t len) { + if (!VirtualUnlock(ptr, len)) { + fprintf(stderr, "warning: failed to VirtualUnlock buffer: %s\n", + llama_format_win_err(GetLastError()).c_str()); + } + } +#else + static constexpr bool SUPPORTED = false; + + size_t lock_granularity() { + return (size_t) 65536; + } + + bool raw_lock(const void * addr, size_t len) { + fprintf(stderr, "warning: mlock not supported on this system\n"); + return false; + } + + void raw_unlock(const void * addr, size_t len) {} +#endif +}; + +// Replacement for std::vector that doesn't require zero-initialization. +struct llama_buffer { + uint8_t * addr = NULL; + size_t size = 0; + + llama_buffer() = default; + + void resize(size_t len) { +#ifdef GGML_USE_METAL + free(addr); + int result = posix_memalign((void **) &addr, getpagesize(), len); + if (result == 0) { + memset(addr, 0, len); + } + else { + addr = NULL; + } +#else + delete[] addr; + addr = new uint8_t[len]; +#endif + size = len; + } + + ~llama_buffer() { +#ifdef GGML_USE_METAL + free(addr); +#else + delete[] addr; +#endif + addr = NULL; + } + + // disable copy and move + llama_buffer(const llama_buffer&) = delete; + llama_buffer(llama_buffer&&) = delete; + llama_buffer& operator=(const llama_buffer&) = delete; + llama_buffer& operator=(llama_buffer&&) = delete; +}; + +#ifdef GGML_USE_CUBLAS +#include "ggml-cuda.h" +struct llama_ctx_buffer { + uint8_t * addr = NULL; + bool is_cuda; + size_t size = 0; + + llama_ctx_buffer() = default; + + void resize(size_t size) { + free(); + + addr = (uint8_t *) ggml_cuda_host_malloc(size); + if (addr) { + is_cuda = true; + } + else { + // fall back to pageable memory + addr = new uint8_t[size]; + is_cuda = false; + } + this->size = size; + } + + void free() { + if (addr) { + if (is_cuda) { + ggml_cuda_host_free(addr); + } + else { + delete[] addr; + } + } + addr = NULL; + } + + ~llama_ctx_buffer() { + free(); + } + + // disable copy and move + llama_ctx_buffer(const llama_ctx_buffer&) = delete; + llama_ctx_buffer(llama_ctx_buffer&&) = delete; + llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete; + llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete; +}; +#else +typedef llama_buffer llama_ctx_buffer; +#endif + +#endif diff --git a/ctransformers/models/ggml/llama.cpp b/ctransformers/models/ggml/llama.cpp new file mode 100644 index 0000000000000000000000000000000000000000..eada658c8bd8a6a2a98b1edbb8ad761b7f9c5aa0 --- /dev/null +++ b/ctransformers/models/ggml/llama.cpp @@ -0,0 +1,3734 @@ +// Defines fileno on msys: +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#include +#include +#include +#endif + +#include "ggml.h" +#include "llama-util.h" +#include "llama.h" +#ifdef GGML_USE_CUBLAS +#include "ggml-cuda.h" +#elif defined(GGML_USE_CLBLAST) +#include "ggml-opencl.h" +#endif + +#ifdef GGML_USE_METAL +#include "ggml-metal.h" +#endif +#ifdef GGML_USE_K_QUANTS +#ifndef QK_K +#define QK_K 256 +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +#pragma warning(disable : 4244 4267) // possible loss of data +#endif + +#define LLAMA_USE_SCRATCH +#define LLAMA_MAX_SCRATCH_BUFFERS 16 + +// available llama models +enum e_model { + MODEL_UNKNOWN, + MODEL_3B, + MODEL_7B, + MODEL_13B, + MODEL_30B, + MODEL_65B, +}; + +static const size_t MB = 1024 * 1024; + +// computed for n_ctx == 2048 +// TODO: dynamically determine these sizes +// needs modifications in ggml + +typedef void (*offload_func_t)(struct ggml_tensor *tensor); + +void llama_nop(struct ggml_tensor *tensor) { // don't offload by default + (void)tensor; +} + +static const std::map &MEM_REQ_SCRATCH0() { + static std::map k_sizes = { + {MODEL_3B, 256ull * MB}, {MODEL_7B, 512ull * MB}, + {MODEL_13B, 512ull * MB}, {MODEL_30B, 512ull * MB}, + {MODEL_65B, 1024ull * MB}, + }; + return k_sizes; +} + +static const std::map &MEM_REQ_SCRATCH1() { + static std::map k_sizes = { + {MODEL_3B, 256ull * MB}, {MODEL_7B, 512ull * MB}, + {MODEL_13B, 512ull * MB}, {MODEL_30B, 512ull * MB}, + {MODEL_65B, 1024ull * MB}, + }; + return k_sizes; +} + +// 2*n_embd*n_ctx*n_layer*sizeof(float16) +static const std::map &MEM_REQ_KV_SELF() { + static std::map k_sizes = { + {MODEL_3B, 682ull * MB}, {MODEL_7B, 1026ull * MB}, + {MODEL_13B, 1608ull * MB}, {MODEL_30B, 3124ull * MB}, + {MODEL_65B, 5120ull * MB}, + }; + return k_sizes; +} + +// this is mostly needed for temporary mul_mat buffers to dequantize the data +// not actually needed if BLAS is disabled +static const std::map &MEM_REQ_EVAL() { + static std::map k_sizes = { + {MODEL_3B, 512ull * MB}, {MODEL_7B, 768ull * MB}, + {MODEL_13B, 1024ull * MB}, {MODEL_30B, 1280ull * MB}, + {MODEL_65B, 1536ull * MB}, + }; + return k_sizes; +} + +// default hparams (LLaMA 7B) +struct llama_hparams { + uint32_t n_vocab = 32000; + uint32_t n_ctx = 512; // this is provided as user input? + uint32_t n_embd = 4096; + uint32_t n_mult = 256; + uint32_t n_head = 32; + uint32_t n_layer = 32; + uint32_t n_rot = 64; + enum llama_ftype ftype = LLAMA_FTYPE_MOSTLY_F16; + + bool operator!=(const llama_hparams &other) const { + return static_cast(memcmp(this, &other, sizeof(llama_hparams))); + } +}; + +struct llama_layer { + // normalization + struct ggml_tensor *attention_norm; + + // attention + struct ggml_tensor *wq; + struct ggml_tensor *wk; + struct ggml_tensor *wv; + struct ggml_tensor *wo; + + // normalization + struct ggml_tensor *ffn_norm; + + // ff + struct ggml_tensor *w1; + struct ggml_tensor *w2; + struct ggml_tensor *w3; +}; + +struct llama_kv_cache { + struct ggml_tensor *k; + struct ggml_tensor *v; + + struct ggml_context *ctx = NULL; + + llama_ctx_buffer buf; + + int n; // number of tokens currently in the cache + + ~llama_kv_cache() { + if (ctx) { + ggml_free(ctx); + } + +#ifdef GGML_USE_CUBLAS + ggml_cuda_free_data(k); + ggml_cuda_free_data(v); +#endif // GGML_USE_CUBLAS + } +}; + +struct llama_model { + e_model type = MODEL_UNKNOWN; + + llama_hparams hparams; + + struct ggml_tensor *tok_embeddings; + + struct ggml_tensor *norm; + struct ggml_tensor *output; + + std::vector layers; + int n_gpu_layers; + + // context + struct ggml_context *ctx = NULL; + + // key + value cache for the self attention + // TODO: move to llama_state + struct llama_kv_cache kv_self; + + // the model memory buffer + llama_ctx_buffer buf; + + // model memory mapped file + std::unique_ptr mapping; + + // objects representing data potentially being locked in memory + llama_mlock mlock_buf; + llama_mlock mlock_mmap; + + // for quantize-stats only + std::vector> tensors_by_name; + + ~llama_model() { + if (ctx) { + ggml_free(ctx); + } + +#ifdef GGML_USE_CUBLAS + for (size_t i = 0; i < tensors_by_name.size(); ++i) { + ggml_cuda_free_data(tensors_by_name[i].second); + } + ggml_cuda_free_scratch(); +#elif defined(GGML_USE_CLBLAST) + for (size_t i = 0; i < tensors_by_name.size(); ++i) { + ggml_cl_free_data(tensors_by_name[i].second); + } +#endif + } +}; + +struct llama_vocab { + using id = int32_t; + using token = std::string; + + struct token_score { + token tok; + float score; + }; + + std::unordered_map token_to_id; + std::vector id_to_token; +}; + +struct llama_context { + std::mt19937 rng; + + int64_t t_load_us = 0; + int64_t t_start_us = 0; + bool has_evaluated_once = false; + + int64_t t_sample_us = 0; + int64_t t_eval_us = 0; + int64_t t_p_eval_us = 0; + + int32_t n_sample = 0; // number of tokens sampled + int32_t n_eval = 0; // number of eval calls + int32_t n_p_eval = + 0; // number of tokens in eval calls for the prompt (with batch size > 1) + + llama_model model; + llama_vocab vocab; + + size_t mem_per_token = 0; + + // decode output (2-dimensional array: [n_tokens][n_vocab]) + std::vector logits; + bool logits_all = false; + + // input embedding (1-dimensional array: [n_embd]) + std::vector embedding; + + // memory buffers used to evaluate the model + // TODO: move in llama_state + llama_ctx_buffer buf_compute; + llama_ctx_buffer buf_scratch[LLAMA_MAX_SCRATCH_BUFFERS]; + +#ifdef GGML_USE_METAL + ggml_metal_context *ctx_metal = NULL; +#endif + + int buf_last = 0; + size_t buf_max_size[LLAMA_MAX_SCRATCH_BUFFERS] = {0}; + + void use_buf(struct ggml_context *ctx, int i) { +#if defined(LLAMA_USE_SCRATCH) + size_t last_size = 0; + + if (i == -1) { + last_size = ggml_set_scratch(ctx, { + 0, + 0, + nullptr, + }); + } else { + auto &buf = buf_scratch[i]; + last_size = ggml_set_scratch(ctx, { + 0, + buf.size, + buf.addr, + }); + } + + if (buf_last >= 0) { + buf_max_size[buf_last] = std::max(buf_max_size[buf_last], last_size); + } + + buf_last = i; +#else + (void)i; + (void)ctx; +#endif + } + + size_t get_buf_max_mem(int i) const { +#if defined(LLAMA_USE_SCRATCH) + return buf_max_size[i]; +#else + (void)i; + return 0; +#endif + } +}; + +template +static T checked_mul(T a, T b) { + T ret = a * b; + if (a != 0 && ret / a != b) { + throw std::runtime_error(format("overflow multiplying %llu * %llu", + (unsigned long long)a, + (unsigned long long)b)); + } + return ret; +} + +static size_t checked_div(size_t a, size_t b) { + if (b == 0 || a % b != 0) { + throw std::runtime_error(format("error dividing %zu / %zu", a, b)); + } + return a / b; +} + +static std::string llama_format_tensor_shape(const std::vector &ne) { + char buf[256]; + snprintf(buf, sizeof(buf), "%5u", ne.at(0)); + for (size_t i = 1; i < ne.size(); i++) { + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " x %5u", ne.at(i)); + } + return buf; +} + +static size_t llama_calc_tensor_size(const std::vector &ne, + enum ggml_type type) { + size_t size = ggml_type_size(type); + for (uint32_t dim : ne) { + size = checked_mul(size, dim); + } + return size / ggml_blck_size(type); +} + +struct llama_load_tensor_shard { + std::vector ne; + size_t size; + enum ggml_type type; + size_t file_idx; + size_t file_off; + + void calc_size() { size = llama_calc_tensor_size(ne, type); } +}; + +enum llama_split_type { SPLIT_NONE, SPLIT_BY_COLUMNS, SPLIT_BY_ROWS }; + +struct llama_load_tensor { + std::vector shards; + + std::string name; + enum ggml_type type = GGML_TYPE_F32; + llama_split_type split_type = SPLIT_NONE; + std::vector ne; + size_t size; + struct ggml_tensor *ggml_tensor = NULL; + uint8_t *data; + + llama_load_tensor(const std::string &name) : name(name) {} + + void calc_all() { + calc_type(); + calc_split_type(); + calc_ne(); + calc_size(); + } + + void calc_type() { + const auto &first_shard = shards.at(0); + for (const auto &shard : shards) { + if (shard.type != first_shard.type) { + throw std::runtime_error( + format("inconsistent tensor shard type in '%s'", name.c_str())); + } + } + type = first_shard.type; + } + + void calc_split_type() { + if (shards.at(0).ne.size() == + 1 || // 1D tensors are just duplicated in every file + shards.size() == 1) { // only one file? + split_type = SPLIT_NONE; + } else if (name.find("tok_embeddings.") == 0 || + name.find(".attention.wo.weight") != std::string::npos || + name.find(".feed_forward.w2.weight") != std::string::npos) { + split_type = SPLIT_BY_COLUMNS; + } else { + split_type = SPLIT_BY_ROWS; + } + } + + void calc_ne() { + const auto &first_shard = shards.at(0); + for (const auto &shard : shards) { + if (shard.ne != first_shard.ne) { + throw std::runtime_error(format( + "inconsistent tensor shard shape in '%s': first was %s, other was " + "%s", + name.c_str(), llama_format_tensor_shape(first_shard.ne).c_str(), + llama_format_tensor_shape(shard.ne).c_str())); + } + } + ne = first_shard.ne; + LLAMA_ASSERT(shards.size() <= UINT32_MAX); + uint32_t n_shards = (uint32_t)shards.size(); + switch (split_type) { + case SPLIT_NONE: + ne = first_shard.ne; + break; + case SPLIT_BY_COLUMNS: + ne = {checked_mul(first_shard.ne[0], n_shards), + first_shard.ne[1]}; + break; + case SPLIT_BY_ROWS: + ne = {first_shard.ne[0], + checked_mul(first_shard.ne[1], n_shards)}; + break; + } + } + + void calc_size() { size = llama_calc_tensor_size(ne, type); } +}; + +struct llama_load_tensors_map { + // tensors is kept in a separate vector to preserve file order + std::vector tensors; + std::unordered_map name_to_idx; +}; + +enum llama_file_version { + LLAMA_FILE_VERSION_GGML, + LLAMA_FILE_VERSION_GGMF_V1, // added version field and scores in vocab + LLAMA_FILE_VERSION_GGJT_V1, // added padding + LLAMA_FILE_VERSION_GGJT_V2, // changed quantization format + LLAMA_FILE_VERSION_GGJT_V3, // changed Q4 and Q8 quantization format +}; + +struct llama_file_loader { + llama_file file; + llama_file_version file_version; + llama_hparams hparams; + llama_vocab vocab; + + llama_file_loader(const char *fname, size_t file_idx, + llama_load_tensors_map &tensors_map) + : file(fname, "rb") { + read_magic(); + read_hparams(); + read_vocab(); + read_tensor_metadata(file_idx, tensors_map); + } + void read_magic() { + uint32_t magic = file.read_u32(); + + if (magic == LLAMA_FILE_MAGIC_GGML) { + file_version = LLAMA_FILE_VERSION_GGML; + return; + } + + uint32_t version = file.read_u32(); + + switch (magic) { + case LLAMA_FILE_MAGIC_GGMF: + switch (version) { + case 1: + file_version = LLAMA_FILE_VERSION_GGMF_V1; + return; + } + break; + case LLAMA_FILE_MAGIC_GGJT: + switch (version) { + case 1: + file_version = LLAMA_FILE_VERSION_GGJT_V1; + return; + case 2: + file_version = LLAMA_FILE_VERSION_GGJT_V2; + return; + case 3: + file_version = LLAMA_FILE_VERSION_GGJT_V3; + return; + } + } + + throw std::runtime_error( + format("unknown (magic, version) combination: %08x, %08x; is this " + "really a GGML file?", + magic, version)); + } + void read_hparams() { + hparams.n_vocab = file.read_u32(); + hparams.n_embd = file.read_u32(); + hparams.n_mult = file.read_u32(); + hparams.n_head = file.read_u32(); + hparams.n_layer = file.read_u32(); + hparams.n_rot = file.read_u32(); + hparams.ftype = (enum llama_ftype)file.read_u32(); + } + void read_vocab() { + vocab.id_to_token.resize(hparams.n_vocab); + + for (uint32_t i = 0; i < hparams.n_vocab; i++) { + uint32_t len = file.read_u32(); + std::string word = file.read_string(len); + + float score = 0.0f; + if (file_version >= LLAMA_FILE_VERSION_GGMF_V1) { + file.read_raw(&score, sizeof(score)); + } + + vocab.token_to_id[word] = i; + + auto &tok_score = vocab.id_to_token[i]; + tok_score.tok = std::move(word); + tok_score.score = score; + } + } + void read_tensor_metadata(size_t file_idx, + llama_load_tensors_map &tensors_map) { + while (file.tell() < file.size) { + llama_load_tensor_shard shard; + uint32_t n_dims = file.read_u32(); + uint32_t name_len = file.read_u32(); + shard.type = (enum ggml_type)file.read_u32(); + shard.ne.resize(n_dims); + file.read_raw(shard.ne.data(), sizeof(shard.ne[0]) * n_dims); + std::string name = file.read_string(name_len); + if (n_dims < 1 || n_dims > 2) { + throw std::runtime_error( + format("llama.cpp: tensor '%s' should not be %u-dimensional", + name.c_str(), n_dims)); + } + switch (shard.type) { + case GGML_TYPE_F32: + case GGML_TYPE_F16: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + break; + default: { + throw std::runtime_error( + format("unrecognized tensor type %u\n", shard.type)); + } + } + + if (file_version >= LLAMA_FILE_VERSION_GGJT_V1) { + // skip to the next multiple of 32 bytes + file.seek(-static_cast(file.tell()) & 31, SEEK_CUR); + } + shard.file_idx = file_idx; + shard.file_off = file.tell(); + + shard.calc_size(); + file.seek(shard.size, SEEK_CUR); + + auto it = tensors_map.name_to_idx.find(name); + size_t idx; + if (it != tensors_map.name_to_idx.end()) { + idx = it->second; + } else { + tensors_map.tensors.emplace_back(name); + idx = tensors_map.tensors.size() - 1; + tensors_map.name_to_idx.emplace(name, idx); + } + tensors_map.tensors.at(idx).shards.push_back(shard); + } + } +}; + +struct llama_file_saver { + llama_file file; + llama_file_loader *any_file_loader; + llama_file_saver(const char *fname, llama_file_loader *any_file_loader, + enum llama_ftype new_ftype) + : file(fname, "wb"), any_file_loader(any_file_loader) { + fprintf(stderr, "llama.cpp: saving model to %s\n", fname); + write_magic(); + write_hparams(new_ftype); + write_vocab(); + } + void write_magic() { + file.write_u32(LLAMA_FILE_MAGIC); // magic + file.write_u32(LLAMA_FILE_VERSION); // version + } + void write_hparams(enum llama_ftype new_ftype) { + const llama_hparams &hparams = any_file_loader->hparams; + file.write_u32(hparams.n_vocab); + file.write_u32(hparams.n_embd); + file.write_u32(hparams.n_mult); + file.write_u32(hparams.n_head); + file.write_u32(hparams.n_layer); + file.write_u32(hparams.n_rot); + file.write_u32(new_ftype); + } + void write_vocab() { + if (any_file_loader->file_version == LLAMA_FILE_VERSION_GGML) { + fprintf(stderr, + "llama.cpp: WARNING: input is an old file that doesn't have " + "scores; will add dummy scores\n"); + } + uint32_t n_vocab = any_file_loader->hparams.n_vocab; + for (uint32_t i = 0; i < n_vocab; i++) { + const auto &token_score = any_file_loader->vocab.id_to_token.at(i); + file.write_u32((uint32_t)token_score.tok.size()); + file.write_raw(token_score.tok.data(), token_score.tok.size()); + file.write_raw(&token_score.score, sizeof(token_score.score)); + } + } + void write_tensor(llama_load_tensor &tensor, enum ggml_type new_type, + const void *new_data, size_t new_size) { + switch (new_type) { + case GGML_TYPE_F32: + case GGML_TYPE_F16: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q2_K: + case GGML_TYPE_Q3_K: + case GGML_TYPE_Q4_K: + case GGML_TYPE_Q5_K: + case GGML_TYPE_Q6_K: + break; + default: + LLAMA_ASSERT(false); + } + file.write_u32((uint32_t)tensor.ne.size()); + file.write_u32((uint32_t)tensor.name.size()); + file.write_u32(new_type); + file.write_raw(tensor.ne.data(), sizeof(tensor.ne[0]) * tensor.ne.size()); + file.write_raw(tensor.name.data(), tensor.name.size()); + file.seek(-static_cast(file.tell()) & 31, SEEK_CUR); + LLAMA_ASSERT(new_size == llama_calc_tensor_size(tensor.ne, new_type)); + file.write_raw(new_data, new_size); + } +}; + +struct llama_model_loader { + std::vector> file_loaders; + llama_load_tensors_map tensors_map; + bool use_mmap; + size_t num_ggml_tensors_created = 0; + struct ggml_context *ggml_ctx = NULL; + std::unique_ptr mapping; + + llama_model_loader(const std::string &fname_base, bool use_mmap, + bool vocab_only) { + auto *first_file = + new llama_file_loader(fname_base.c_str(), 0, tensors_map); + file_loaders.emplace_back(first_file); + uint32_t n_parts = vocab_only ? 1 : guess_n_parts(); + for (uint32_t i = 1; i < n_parts; i++) { + std::string fname = fname_base + "." + std::to_string(i); + auto *ith_file = new llama_file_loader(fname.c_str(), i, tensors_map); + file_loaders.emplace_back(ith_file); + if (ith_file->hparams != first_file->hparams) { + throw std::runtime_error( + format("llama.cpp: hparams inconsistent between files")); + } + } + if (!llama_mmap::SUPPORTED) { + use_mmap = false; + } + if (use_mmap && alignment_prevents_mmap()) { + fprintf(stderr, + "llama.cpp: can't use mmap because tensors are not aligned; " + "convert to new format to avoid this\n"); + use_mmap = false; + } + this->use_mmap = use_mmap; + for (llama_load_tensor < : tensors_map.tensors) { + lt.calc_all(); + } + } + + bool alignment_prevents_mmap() { + for (const llama_load_tensor < : tensors_map.tensors) { + for (const llama_load_tensor_shard &shard : lt.shards) { + if (shard.file_off & 3) { + return true; + } + } + } + return false; + } + + uint32_t guess_n_parts() const { + auto it = tensors_map.name_to_idx.find("tok_embeddings.weight"); + if (it == tensors_map.name_to_idx.end()) { + throw std::runtime_error(std::string("missing tok_embeddings.weight")); + } + const llama_load_tensor < = tensors_map.tensors.at(it->second); + return file_loaders.at(0)->hparams.n_embd / lt.shards.at(0).ne.at(0); + } + + void calc_sizes(size_t *ctx_size_p, size_t *mmapped_size_p) const { + *ctx_size_p = *mmapped_size_p = 0; + for (const llama_load_tensor < : tensors_map.tensors) { + *ctx_size_p += sizeof(struct ggml_tensor) + GGML_OBJECT_SIZE; + *(use_mmap ? mmapped_size_p : ctx_size_p) += lt.size; + } + } + + struct ggml_tensor *get_tensor(const std::string &name, + const std::vector &ne, + ggml_backend backend) { + auto it = tensors_map.name_to_idx.find(name); + if (it == tensors_map.name_to_idx.end()) { + throw std::runtime_error(std::runtime_error(format( + "llama.cpp: tensor '%s' is missing from model", name.c_str()))); + } + llama_load_tensor < = tensors_map.tensors.at(it->second); + if (lt.ne != ne) { + throw std::runtime_error( + format("llama.cpp: tensor '%s' has wrong shape; expected %s, got %s", + name.c_str(), llama_format_tensor_shape(ne).c_str(), + llama_format_tensor_shape(lt.ne).c_str())); + } + + return get_tensor_for(lt, backend); + } + + struct ggml_tensor *get_tensor_for(llama_load_tensor <, + ggml_backend backend) { + struct ggml_tensor *tensor; + if (backend != GGML_BACKEND_CPU) { + ggml_set_no_alloc(ggml_ctx, true); + } + if (lt.ne.size() == 2) { + tensor = ggml_new_tensor_2d(ggml_ctx, lt.type, lt.ne.at(0), lt.ne.at(1)); + } else { + LLAMA_ASSERT(lt.ne.size() == 1); + tensor = ggml_new_tensor_1d(ggml_ctx, lt.type, lt.ne.at(0)); + } + ggml_set_name(tensor, lt.name.c_str()); + LLAMA_ASSERT( + lt.ggml_tensor == + NULL); // if this fails, we called get_tensor twice on the same tensor + + if (backend != GGML_BACKEND_CPU) { + ggml_set_no_alloc(ggml_ctx, use_mmap); + } + tensor->backend = backend; + lt.ggml_tensor = tensor; + num_ggml_tensors_created++; + return tensor; + } + + void done_getting_tensors() const { + if (num_ggml_tensors_created != tensors_map.tensors.size()) { + throw std::runtime_error( + std::string("llama.cpp: file contained more tensors than expected")); + } + } + + void load_all_data(llama_progress_callback progress_callback, + void *progress_callback_user_data, llama_mlock *lmlock) { + size_t data_size = 0; + size_t prefetch_size = 0; + size_t lock_size = 0; + for (const llama_load_tensor < : tensors_map.tensors) { + data_size += lt.size; + if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) { + prefetch_size += lt.size; + } + } + + if (use_mmap) { + mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size)); + if (lmlock) { + lmlock->init(mapping->addr); + } + } + + size_t done_size = 0; + for (llama_load_tensor < : tensors_map.tensors) { + if (progress_callback) { + progress_callback((float)done_size / data_size, + progress_callback_user_data); + } + LLAMA_ASSERT(lt.ggml_tensor); // unused tensors should have been caught + // by load_data already + lt.data = (uint8_t *)lt.ggml_tensor->data; + + // allocate temp buffer if not using mmap + if (!use_mmap && lt.data == NULL) { + GGML_ASSERT(lt.ggml_tensor->backend != GGML_BACKEND_CPU); + lt.data = (uint8_t *)malloc(ggml_nbytes(lt.ggml_tensor)); + } + + load_data_for(lt); + + switch (lt.ggml_tensor->backend) { + case GGML_BACKEND_CPU: + lt.ggml_tensor->data = lt.data; + if (use_mmap && lmlock) { + lock_size += lt.size; + lmlock->grow_to(lock_size); + } + break; +#if defined(GGML_USE_CUBLAS) + case GGML_BACKEND_GPU: + case GGML_BACKEND_GPU_SPLIT: + ggml_cuda_transform_tensor(lt.data, lt.ggml_tensor); + if (!use_mmap) { + free(lt.data); + } + break; +#elif defined(GGML_USE_CLBLAST) + case GGML_BACKEND_GPU: + ggml_cl_transform_tensor(lt.data, lt.ggml_tensor); + if (!use_mmap) { + free(lt.data); + } + break; +#endif + default: + continue; + } + + done_size += lt.size; + } + } + + void load_data_for(llama_load_tensor <) { + if (use_mmap) { + LLAMA_ASSERT(lt.shards.size() == 1); + lt.data = (uint8_t *)mapping->addr + lt.shards.at(0).file_off; + } else if (lt.split_type == SPLIT_NONE) { + llama_file &file = file_loaders.at(lt.shards.at(0).file_idx)->file; + file.seek(lt.shards.at(0).file_off, SEEK_SET); + file.read_raw(lt.data, lt.size); + } else if (lt.split_type == SPLIT_BY_ROWS) { + size_t offset = 0; + for (llama_load_tensor_shard &shard : lt.shards) { + llama_file &file = file_loaders.at(shard.file_idx)->file; + file.seek(shard.file_off, SEEK_SET); + file.read_raw(lt.data + offset, shard.size); + offset += shard.size; + } + LLAMA_ASSERT(offset == lt.size); + } else if (lt.split_type == SPLIT_BY_COLUMNS) { + // Let's load the data into temporary buffers to ensure the OS performs + // large loads. + std::vector tmp_bufs(lt.shards.size()); + for (size_t i = 0; i < lt.shards.size(); i++) { + llama_load_tensor_shard &shard = lt.shards.at(i); + llama_file &file = file_loaders.at(shard.file_idx)->file; + file.seek(shard.file_off, SEEK_SET); + tmp_bufs.at(i).resize(shard.size); + file.read_raw(tmp_bufs.at(i).addr, shard.size); + } + // Then reshape. + size_t num_rows = lt.ne.at(1); + size_t per_shard_row_size = lt.shards.at(0).size / num_rows; + size_t out_offset = 0; + for (size_t row = 0; row < num_rows; row++) { + for (llama_buffer &tmp_buf : tmp_bufs) { + memcpy(lt.data + out_offset, tmp_buf.addr + row * per_shard_row_size, + per_shard_row_size); + out_offset += per_shard_row_size; + } + } + LLAMA_ASSERT(out_offset == lt.size); + } + if (0) { + print_checksum(lt); + } + } + + static void print_checksum(llama_load_tensor <) { + uint32_t sum = 0; + for (size_t i = 0; i < lt.size; i++) { + uint8_t byte = lt.data[i]; + sum = byte + (sum << 6) + (sum << 16) - sum; // sdbm hash + } + fprintf(stderr, "%s checksum: %#08x (%s, size %zu)\n", lt.name.c_str(), sum, + llama_format_tensor_shape(lt.ne).c_str(), lt.size); + } +}; + +// +// kv cache +// + +static bool kv_cache_init(const struct llama_hparams &hparams, + struct llama_kv_cache &cache, ggml_type wtype, + int n_ctx, int n_gpu_layers) { + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + + const int64_t n_mem = n_layer * n_ctx; + const int64_t n_elements = n_embd * n_mem; + + cache.buf.resize(2u * n_elements * ggml_type_size(wtype) + 2u * MB); + cache.n = 0; + + struct ggml_init_params params; + params.mem_size = cache.buf.size; + params.mem_buffer = cache.buf.addr; + params.no_alloc = false; + + cache.ctx = ggml_init(params); + + if (!cache.ctx) { + fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__); + return false; + } + + cache.k = ggml_new_tensor_1d(cache.ctx, wtype, n_elements); + cache.v = ggml_new_tensor_1d(cache.ctx, wtype, n_elements); + ggml_set_name(cache.k, "cache_k"); + ggml_set_name(cache.v, "cache_v"); + + (void)n_gpu_layers; +#ifdef GGML_USE_CUBLAS + if (n_gpu_layers > n_layer + 1) { + ggml_cuda_assign_buffers_no_scratch(cache.v); + } + if (n_gpu_layers > n_layer + 2) { + ggml_cuda_assign_buffers_no_scratch(cache.k); + } +#endif // GGML_USE_CUBLAS + + return true; +} + +struct llama_context_params llama_context_default_params() { + struct llama_context_params result = { + /*.n_ctx =*/512, + /*.n_batch =*/512, + /*.gpu_layers =*/0, + /*.main_gpu =*/0, + /*.tensor_split =*/{0}, + /*.low_vram =*/false, + /*.seed =*/-1, + /*.f16_kv =*/true, + /*.logits_all =*/false, + /*.vocab_only =*/false, + /*.use_mmap =*/true, + /*.use_mlock =*/false, + /*.embedding =*/false, + /*.progress_callback =*/nullptr, + /*.progress_callback_user_data =*/nullptr, + }; + + return result; +} + +struct llama_model_quantize_params llama_model_quantize_default_params() { + struct llama_model_quantize_params result = { + /*.nthread =*/0, + /*.ftype =*/LLAMA_FTYPE_MOSTLY_Q5_1, + /*.allow_requantize =*/false, + /*.quantize_output_tensor =*/true, + }; + + return result; +} + +bool llama_mmap_supported() { return llama_mmap::SUPPORTED; } + +bool llama_mlock_supported() { return llama_mlock::SUPPORTED; } + +void llama_init_backend() { + ggml_time_init(); + + // needed to initialize f16 tables + { + struct ggml_init_params params = {0, NULL, false}; + struct ggml_context *ctx = ggml_init(params); + ggml_free(ctx); + } +} + +int64_t llama_time_us() { return ggml_time_us(); } + +// +// model loading +// + +static const char *llama_file_version_name(llama_file_version version) { + switch (version) { + case LLAMA_FILE_VERSION_GGML: + return "'ggml' (old version with low tokenizer quality and no mmap " + "support)"; + case LLAMA_FILE_VERSION_GGMF_V1: + return "ggmf v1 (old version with no mmap support)"; + case LLAMA_FILE_VERSION_GGJT_V1: + return "ggjt v1 (pre #1405)"; + case LLAMA_FILE_VERSION_GGJT_V2: + return "ggjt v2 (pre #1508)"; + case LLAMA_FILE_VERSION_GGJT_V3: + return "ggjt v3 (latest)"; + } + + return "unknown"; +} + +static const char *llama_ftype_name(enum llama_ftype ftype) { + switch (ftype) { + case LLAMA_FTYPE_ALL_F32: + return "all F32"; + case LLAMA_FTYPE_MOSTLY_F16: + return "mostly F16"; + case LLAMA_FTYPE_MOSTLY_Q4_0: + return "mostly Q4_0"; + case LLAMA_FTYPE_MOSTLY_Q4_1: + return "mostly Q4_1"; + case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: + return "mostly Q4_1, some F16"; + case LLAMA_FTYPE_MOSTLY_Q5_0: + return "mostly Q5_0"; + case LLAMA_FTYPE_MOSTLY_Q5_1: + return "mostly Q5_1"; + case LLAMA_FTYPE_MOSTLY_Q8_0: + return "mostly Q8_0"; + // K-quants + case LLAMA_FTYPE_MOSTLY_Q2_K: + return "mostly Q2_K"; + case LLAMA_FTYPE_MOSTLY_Q3_K_S: + return "mostly Q3_K - Small"; + case LLAMA_FTYPE_MOSTLY_Q3_K_M: + return "mostly Q3_K - Medium"; + case LLAMA_FTYPE_MOSTLY_Q3_K_L: + return "mostly Q3_K - Large"; + case LLAMA_FTYPE_MOSTLY_Q4_K_S: + return "mostly Q4_K - Small"; + case LLAMA_FTYPE_MOSTLY_Q4_K_M: + return "mostly Q4_K - Medium"; + case LLAMA_FTYPE_MOSTLY_Q5_K_S: + return "mostly Q5_K - Small"; + case LLAMA_FTYPE_MOSTLY_Q5_K_M: + return "mostly Q5_K - Medium"; + case LLAMA_FTYPE_MOSTLY_Q6_K: + return "mostly Q6_K"; + default: + return "unknown, may not work"; + } +} + +static const char *llama_model_type_name(e_model type) { + switch (type) { + case MODEL_3B: + return "3B"; + case MODEL_7B: + return "7B"; + case MODEL_13B: + return "13B"; + case MODEL_30B: + return "30B"; + case MODEL_65B: + return "65B"; + default: + LLAMA_ASSERT(false); + } +} + +static void llama_model_load_internal( + const std::string &fname, llama_context &lctx, int n_ctx, int n_batch, + int n_gpu_layers, int main_gpu, const float *tensor_split, bool low_vram, + ggml_type memory_type, bool use_mmap, bool use_mlock, bool vocab_only, + llama_progress_callback progress_callback, + void *progress_callback_user_data) { + lctx.t_start_us = ggml_time_us(); + + std::unique_ptr ml( + new llama_model_loader(fname, use_mmap, vocab_only)); + + lctx.vocab = std::move(ml->file_loaders.at(0)->vocab); + auto &model = lctx.model; + model.hparams = ml->file_loaders.at(0)->hparams; + model.n_gpu_layers = n_gpu_layers; + llama_file_version file_version = ml->file_loaders.at(0)->file_version; + auto &hparams = model.hparams; + + { + switch (hparams.n_layer) { + case 26: + model.type = e_model::MODEL_3B; + break; + case 32: + model.type = e_model::MODEL_7B; + break; + case 40: + model.type = e_model::MODEL_13B; + break; + case 60: + model.type = e_model::MODEL_30B; + break; + case 80: + model.type = e_model::MODEL_65B; + break; + default: { + if (hparams.n_layer < 32) { + model.type = e_model::MODEL_7B; + } + } break; + } + + hparams.n_ctx = n_ctx; + } + + const uint32_t n_ff = + ((2 * (4 * hparams.n_embd) / 3 + hparams.n_mult - 1) / hparams.n_mult) * + hparams.n_mult; + + if (file_version < LLAMA_FILE_VERSION_GGJT_V2) { + if (hparams.ftype != LLAMA_FTYPE_ALL_F32 && + hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 && + hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) { + throw std::runtime_error( + format("this format is no longer supported (see " + "https://github.com/ggerganov/llama.cpp/pull/1405)")); + } + } + + if (file_version < LLAMA_FILE_VERSION_GGJT_V3) { + if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 || + hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 || + hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) { + throw std::runtime_error( + format("this format is no longer supported (see " + "https://github.com/ggerganov/llama.cpp/pull/1508)")); + } + } + + if (vocab_only) { + return; + } + + auto &ctx = model.ctx; + + size_t ctx_size; + size_t mmapped_size; + ml->calc_sizes(&ctx_size, &mmapped_size); + + // create the ggml context + { + lctx.model.buf.resize(ctx_size); + if (use_mlock) { + lctx.model.mlock_buf.init(lctx.model.buf.addr); + lctx.model.mlock_buf.grow_to(lctx.model.buf.size); + } + + struct ggml_init_params params = { + /*.mem_size =*/lctx.model.buf.size, + /*.mem_buffer =*/lctx.model.buf.addr, + /*.no_alloc =*/ml->use_mmap, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) { + throw std::runtime_error(format("ggml_init() failed")); + } + } + + (void)main_gpu; +#if defined(GGML_USE_CUBLAS) + ggml_cuda_set_main_device(main_gpu); +#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU +#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU_SPLIT +#elif defined(GGML_USE_CLBLAST) + fprintf(stderr, "%s: using OpenCL for GPU acceleration\n", __func__); +#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU +#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU +#else +#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU +#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_CPU +#endif + + // prepare memory for the weights + size_t vram_weights = 0; + size_t vram_scratch = 0; + { + const uint32_t n_embd = hparams.n_embd; + const uint32_t n_layer = hparams.n_layer; + const uint32_t n_vocab = hparams.n_vocab; + + ml->ggml_ctx = ctx; + + model.tok_embeddings = ml->get_tensor("tok_embeddings.weight", + {n_embd, n_vocab}, GGML_BACKEND_CPU); + + // "output" tensor + { + ggml_backend backend_norm; + ggml_backend backend_output; + if (n_gpu_layers > int(n_layer)) { // NOLINT + // norm is not performance relevant on its own but keeping it in VRAM + // reduces data copying on Windows however this is detrimental unless + // everything is on the GPU +#ifndef _WIN32 + backend_norm = low_vram ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD; +#else + backend_norm = low_vram || n_gpu_layers <= (int)n_layer + 2 + ? GGML_BACKEND_CPU + : LLAMA_BACKEND_OFFLOAD; +#endif // _WIN32 + + backend_output = LLAMA_BACKEND_OFFLOAD_SPLIT; + } else { + backend_norm = GGML_BACKEND_CPU; + backend_output = GGML_BACKEND_CPU; + } + + model.norm = ml->get_tensor("norm.weight", {n_embd}, backend_norm); + model.output = + ml->get_tensor("output.weight", {n_embd, n_vocab}, backend_output); + if (backend_norm == GGML_BACKEND_GPU) { + vram_weights += ggml_nbytes(model.norm); + } + if (backend_output == GGML_BACKEND_GPU_SPLIT) { + vram_weights += ggml_nbytes(model.output); + } + } + + const int i_gpu_start = n_layer - n_gpu_layers; + + model.layers.resize(n_layer); + for (uint32_t i = 0; i < n_layer; ++i) { + const ggml_backend backend = int(i) < i_gpu_start + ? GGML_BACKEND_CPU + : LLAMA_BACKEND_OFFLOAD; // NOLINT + const ggml_backend backend_split = + int(i) < i_gpu_start ? GGML_BACKEND_CPU + : LLAMA_BACKEND_OFFLOAD_SPLIT; // NOLINT + + auto &layer = model.layers[i]; + + std::string layers_i = "layers." + std::to_string(i); + + layer.attention_norm = ml->get_tensor(layers_i + ".attention_norm.weight", + {n_embd}, backend); + + layer.wq = ml->get_tensor(layers_i + ".attention.wq.weight", + {n_embd, n_embd}, backend_split); + layer.wk = ml->get_tensor(layers_i + ".attention.wk.weight", + {n_embd, n_embd}, backend_split); + layer.wv = ml->get_tensor(layers_i + ".attention.wv.weight", + {n_embd, n_embd}, backend_split); + layer.wo = ml->get_tensor(layers_i + ".attention.wo.weight", + {n_embd, n_embd}, backend_split); + + layer.ffn_norm = + ml->get_tensor(layers_i + ".ffn_norm.weight", {n_embd}, backend); + + layer.w1 = ml->get_tensor(layers_i + ".feed_forward.w1.weight", + {n_embd, n_ff}, backend_split); + layer.w2 = ml->get_tensor(layers_i + ".feed_forward.w2.weight", + {n_ff, n_embd}, backend_split); + layer.w3 = ml->get_tensor(layers_i + ".feed_forward.w3.weight", + {n_embd, n_ff}, backend_split); + + if (backend == GGML_BACKEND_GPU) { + vram_weights += ggml_nbytes(layer.attention_norm) + + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) + + ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) + + ggml_nbytes(layer.ffn_norm) + ggml_nbytes(layer.w1) + + ggml_nbytes(layer.w2) + ggml_nbytes(layer.w3); + } + } + } + + ml->done_getting_tensors(); + + // print memory requirements + { + const size_t scale = memory_type == GGML_TYPE_F32 ? 2 : 1; + + // this is the total memory required to run the inference + const size_t mem_required = ctx_size + mmapped_size - + vram_weights + // weights in VRAM not in memory + MEM_REQ_SCRATCH0().at(model.type) + + MEM_REQ_SCRATCH1().at(model.type) + + MEM_REQ_EVAL().at(model.type); + + // this is the memory required by one llama_state + const size_t mem_required_state = scale * MEM_REQ_KV_SELF().at(model.type); + + (void)vram_scratch; + (void)n_batch; +#ifdef GGML_USE_CUBLAS + if (low_vram) { + fprintf( + stderr, + "%s: not allocating a VRAM scratch buffer due to low VRAM option\n", + __func__); + ggml_cuda_set_scratch_size(0); // disable scratch + } else { + vram_scratch = n_batch * MB; + ggml_cuda_set_scratch_size(vram_scratch); + } +#endif // GGML_USE_CUBLAS +#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) + const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer)); + size_t vram_kv_cache = 0; + if (n_gpu_layers > (int)hparams.n_layer + 1) { + if (low_vram) { + fprintf(stderr, + "%s: cannot offload v cache to GPU due to low VRAM option\n", + __func__); + } else { + fprintf(stderr, "%s: offloading v cache to GPU\n", __func__); + vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2; + } + } + if (n_gpu_layers > (int)hparams.n_layer + 2) { + if (low_vram) { + fprintf(stderr, + "%s: cannot offload k cache to GPU due to low VRAM option\n", + __func__); + } else { + fprintf(stderr, "%s: offloading k cache to GPU\n", __func__); + vram_kv_cache += MEM_REQ_KV_SELF().at(model.type) / 2; + } + } + const int max_offloadable_layers = + low_vram ? hparams.n_layer + 1 : hparams.n_layer + 3; +#else + (void)n_gpu_layers; +#endif + } + + // populate `tensors_by_name` + for (llama_load_tensor < : ml->tensors_map.tensors) { + model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor); + } + + (void)tensor_split; +#if defined(GGML_USE_CUBLAS) + { ggml_cuda_set_tensor_split(tensor_split); } +#endif + + ml->load_all_data(progress_callback, progress_callback_user_data, + use_mlock ? &lctx.model.mlock_mmap : NULL); + + if (progress_callback) { + progress_callback(1.0f, progress_callback_user_data); + } + + model.mapping = std::move(ml->mapping); + + // loading time will be recalculate after the first eval, so + // we take page faults deferred by mmap() into consideration + lctx.t_load_us = ggml_time_us() - lctx.t_start_us; +} + +static bool llama_model_load(const std::string &fname, llama_context &lctx, + int n_ctx, int n_batch, int n_gpu_layers, + int main_gpu, float *tensor_split, bool low_vram, + ggml_type memory_type, bool use_mmap, + bool use_mlock, bool vocab_only, + llama_progress_callback progress_callback, + void *progress_callback_user_data) { + try { + llama_model_load_internal(fname, lctx, n_ctx, n_batch, n_gpu_layers, + main_gpu, tensor_split, low_vram, memory_type, + use_mmap, use_mlock, vocab_only, + progress_callback, progress_callback_user_data); + return true; + } catch (const std::exception &err) { + fprintf(stderr, "error loading model: %s\n", err.what()); + return false; + } +} + +// evaluate the transformer +// +// - lctx: llama context +// - tokens: new batch of tokens to process +// - n_past: the context size so far +// - n_threads: number of threads to use +// - cgraph_fname: filename of the exported computation graph +// +static bool llama_eval_internal(llama_context &lctx, const llama_token *tokens, + const int n_tokens, const int n_past, + const int n_threads, const char *cgraph_fname) { + // enforce that the first token is BOS + if (n_past == 0 && tokens[0] != llama_token_bos()) { + fprintf(stderr, "%s: first token must be BOS\n", __func__); + return false; + } + + const int64_t t_start_us = ggml_time_us(); + + const int N = n_tokens; + + const auto &model = lctx.model; + const auto &hparams = model.hparams; + + const auto &kv_self = model.kv_self; + + LLAMA_ASSERT(!!kv_self.ctx); + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_head = hparams.n_head; + const int n_vocab = hparams.n_vocab; + const int n_rot = hparams.n_embd / hparams.n_head; + const int n_gpu_layers = model.n_gpu_layers; + + auto &mem_per_token = lctx.mem_per_token; + auto &buf_compute = lctx.buf_compute; + + struct ggml_init_params params = { + /*.mem_size =*/buf_compute.size, + /*.mem_buffer =*/buf_compute.addr, + /*.no_alloc =*/false, + }; + + struct ggml_context *ctx0 = ggml_init(params); + + // for big prompts, if BLAS is enabled, it is better to use only one thread + // otherwise, the threads are spin-lock waiting for the BLAS calls and are + // degrading the performance + ggml_cgraph gf = {}; + gf.n_threads = + N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads; + + struct ggml_tensor *embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + ggml_set_name(embd, "embd"); + memcpy(embd->data, tokens, N * ggml_element_size(embd)); + + struct ggml_tensor *cur; + struct ggml_tensor *inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd); + + const int i_gpu_start = n_layer - n_gpu_layers; + (void)i_gpu_start; + + // offload functions set the tensor output backend to GPU + // tensors are GPU-accelerated if any input or the output has been offloaded + // + // with the low VRAM option VRAM scratch is disabled in + // llama_load_model_internal in that case ggml_cuda_assign_buffers has no + // effect + offload_func_t offload_func_nr = llama_nop; // nr = non-repeating + offload_func_t offload_func_kq = llama_nop; + offload_func_t offload_func_v = llama_nop; + +#ifdef GGML_USE_CUBLAS + if (n_gpu_layers > n_layer) { + offload_func_nr = ggml_cuda_assign_buffers; + } + if (n_gpu_layers > n_layer + 1) { + offload_func_v = ggml_cuda_assign_buffers; + } + if (n_gpu_layers > n_layer + 2) { + offload_func_kq = ggml_cuda_assign_buffers; + } +#endif // GGML_USE_CUBLAS + + for (int il = 0; il < n_layer; ++il) { + offload_func_t offload_func = llama_nop; + +#ifdef GGML_USE_CUBLAS + if (il >= i_gpu_start) { + offload_func = ggml_cuda_assign_buffers; + } +#endif // GGML_USE_CUBLAS + + struct ggml_tensor *inpSA = inpL; + + lctx.use_buf(ctx0, 0); + + // norm + { + cur = ggml_rms_norm(ctx0, inpL); + offload_func(cur); + ggml_set_name(cur, "rms_norm_0"); + + // cur = cur*attention_norm(broadcasted) + cur = ggml_mul(ctx0, cur, model.layers[il].attention_norm); + offload_func(cur); + ggml_set_name(cur, "attention_norm_0"); + } + + // self-attention + { + // compute Q and K and RoPE them + struct ggml_tensor *tmpk = ggml_mul_mat(ctx0, model.layers[il].wk, cur); + offload_func_kq(tmpk); + ggml_set_name(tmpk, "tmpk"); + + struct ggml_tensor *tmpq = ggml_mul_mat(ctx0, model.layers[il].wq, cur); + offload_func_kq(tmpq); + ggml_set_name(tmpq, "tmpq"); + + struct ggml_tensor *Kcur = ggml_rope_inplace( + ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd / n_head, n_head, N), n_past, + n_rot, 0); + offload_func_kq(Kcur); + ggml_set_name(Kcur, "Kcur"); + + struct ggml_tensor *Qcur = ggml_rope_inplace( + ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd / n_head, n_head, N), n_past, + n_rot, 0); + offload_func_kq(Qcur); + ggml_set_name(Qcur, "Qcur"); + + // store key and value to memory + { + // compute the transposed [N, n_embd] V matrix + + struct ggml_tensor *tmpv = ggml_mul_mat(ctx0, model.layers[il].wv, cur); + offload_func_v(tmpv); + ggml_set_name(tmpv, "tmpv"); + + struct ggml_tensor *Vcur = + ggml_transpose(ctx0, ggml_reshape_2d(ctx0, tmpv, n_embd, N)); + offload_func_v(Vcur); + ggml_set_name(Vcur, "Vcur"); + + struct ggml_tensor *k = ggml_view_1d( + ctx0, kv_self.k, N * n_embd, + (ggml_element_size(kv_self.k) * n_embd) * (il * n_ctx + n_past)); + offload_func_kq(k); + ggml_set_name(k, "k"); + + struct ggml_tensor *v = ggml_view_2d( + ctx0, kv_self.v, N, n_embd, (n_ctx)*ggml_element_size(kv_self.v), + (il * n_ctx) * ggml_element_size(kv_self.v) * n_embd + + n_past * ggml_element_size(kv_self.v)); + offload_func_v(v); + ggml_set_name(v, "v"); + + // important: storing RoPE-ed version of K in the KV cache! + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + struct ggml_tensor *Q = ggml_permute(ctx0, Qcur, 0, 2, 1, 3); + offload_func_kq(Q); + ggml_set_name(Q, "Q"); + + struct ggml_tensor *K = ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d(ctx0, kv_self.k, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(kv_self.k) * n_embd), + n_embd / n_head, n_head, n_past + N), + 0, 2, 1, 3); + offload_func_kq(K); + ggml_set_name(K, "K"); + + // K * Q + struct ggml_tensor *KQ = ggml_mul_mat(ctx0, K, Q); + offload_func_kq(KQ); + ggml_set_name(KQ, "KQ"); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + struct ggml_tensor *KQ_scale = + ggml_new_f32(ctx0, 1.0f / sqrtf(float(n_embd) / n_head)); + ggml_set_name(KQ_scale, "1/sqrt(n_embd/n_head)"); + + // KQ_scaled shape [n_past + N, N, n_head, 1] + struct ggml_tensor *KQ_scaled = ggml_scale_inplace(ctx0, KQ, KQ_scale); + offload_func_kq(KQ_scaled); + ggml_set_name(KQ_scaled, "KQ_scaled"); + + // KQ_masked = mask_past(KQ_scaled) + struct ggml_tensor *KQ_masked = + ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past); + offload_func_kq(KQ_masked); + ggml_set_name(KQ_masked, "KQ_masked"); + + // KQ = soft_max(KQ_masked) + struct ggml_tensor *KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); + offload_func_v(KQ_soft_max); + ggml_set_name(KQ_soft_max, "KQ_soft_max"); + + // split cached V into n_head heads + struct ggml_tensor *V = + ggml_view_3d(ctx0, kv_self.v, n_past + N, n_embd / n_head, n_head, + n_ctx * ggml_element_size(kv_self.v), + n_ctx * ggml_element_size(kv_self.v) * n_embd / n_head, + il * n_ctx * ggml_element_size(kv_self.v) * n_embd); + offload_func_v(V); + ggml_set_name(V, "V"); + +#if 1 + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); + offload_func_v(KQV); + ggml_set_name(KQV, "KQV"); +#else + // make V contiguous in memory to speed up the matmul, however we waste + // time on the copy on M1 this is faster for the perplexity computation, + // but ~5% slower for the single-token generation is there a better way? + struct ggml_tensor *V_cont = + ggml_cpy(ctx0, V, + ggml_new_tensor_3d(ctx0, kv_self.v->type, n_past + N, + n_embd / n_head, n_head)); + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V_cont, KQ_soft_max); +#endif + + // KQV_merged = KQV.permute(0, 2, 1, 3) + struct ggml_tensor *KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + offload_func_v(KQV_merged); + ggml_set_name(KQV_merged, "KQV_merged"); + + // cur = KQV_merged.contiguous().view(n_embd, N) + cur = ggml_cpy(ctx0, KQV_merged, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + offload_func_v(cur); + ggml_set_name(cur, "KQV_merged_contiguous"); + + // projection (no bias) + cur = ggml_mul_mat(ctx0, model.layers[il].wo, cur); + offload_func(cur); + ggml_set_name(cur, "result_wo"); + } + + lctx.use_buf(ctx0, 1); + + struct ggml_tensor *inpFF = ggml_add(ctx0, cur, inpSA); + offload_func(inpFF); + ggml_set_name(inpFF, "inpFF"); + + // feed-forward network + { + // norm + { + cur = ggml_rms_norm(ctx0, inpFF); + offload_func(cur); + ggml_set_name(cur, "rms_norm_1"); + + // cur = cur*ffn_norm(broadcasted) + cur = ggml_mul(ctx0, cur, model.layers[il].ffn_norm); + offload_func(cur); + ggml_set_name(cur, "ffn_norm"); + } + + struct ggml_tensor *tmp = ggml_mul_mat(ctx0, model.layers[il].w3, cur); + offload_func(tmp); + ggml_set_name(tmp, "result_w3"); + + cur = ggml_mul_mat(ctx0, model.layers[il].w1, cur); + offload_func(cur); + ggml_set_name(cur, "result_w2"); + + // SILU activation + cur = ggml_silu(ctx0, cur); + offload_func(cur); + ggml_set_name(cur, "silu"); + + cur = ggml_mul(ctx0, cur, tmp); + offload_func(cur); + ggml_set_name(cur, "silu_x_result_w3"); + + cur = ggml_mul_mat(ctx0, model.layers[il].w2, cur); + offload_func(cur); + ggml_set_name(cur, "result_w2"); + } + + cur = ggml_add(ctx0, cur, inpFF); + offload_func(cur); + ggml_set_name(cur, "inpFF_+_result_w2"); + + // input for next layer + inpL = cur; + } + + lctx.use_buf(ctx0, 0); + + // used at the end to optionally extract the embeddings + struct ggml_tensor *embeddings = NULL; + + // norm + { + cur = ggml_rms_norm(ctx0, inpL); + offload_func_nr(cur); + ggml_set_name(cur, "rms_norm_2"); + + // cur = cur*norm(broadcasted) + cur = ggml_mul(ctx0, cur, model.norm); + // offload_func_nr(cur); // TODO CPU + GPU mirrored backend + ggml_set_name(cur, "result_norm"); + + embeddings = cur; + } + + // lm_head + cur = ggml_mul_mat(ctx0, model.output, cur); + ggml_set_name(cur, "result_output"); + + lctx.use_buf(ctx0, -1); + + // logits -> probs + // cur = ggml_soft_max_inplace(ctx0, cur); + + // run the computation + ggml_build_forward_expand(&gf, cur); + +#ifdef GGML_USE_METAL + if (lctx.ctx_metal && N == 1) { + ggml_metal_graph_compute(lctx.ctx_metal, &gf); + ggml_metal_get_tensor(lctx.ctx_metal, cur); + } else { + // IMPORTANT: + // Since we don't have efficient Matrix x Matrix Metal multiplication yet, + // we fallback to vanilla ggml_graph_compute(). It uses Apple's Accelerate + // CBLAS API which takes advantage of the ANE or the AMX coprocessor. + // + // When we implement Matrix x Matrix Metal multiplication, we can avoid this + // branch. But for now, we have focused only on Matrix x Vector Metal + // multiplication. + // + // TODO: avoid these syncs via shared memory (ref #1696) + // + if (lctx.ctx_metal) { + // We need to sync the GPU KV cache with the CPU KV cache + ggml_metal_get_tensor(lctx.ctx_metal, kv_self.k); + ggml_metal_get_tensor(lctx.ctx_metal, kv_self.v); + } + + ggml_graph_compute(ctx0, &gf); + } +#else + ggml_graph_compute(ctx0, &gf); +#endif + + if (cgraph_fname) { + ggml_graph_export(&gf, cgraph_fname); + } + +#ifdef GGML_PERF + // print timing information per ggml operation (for debugging purposes) + // requires GGML_PERF to be defined + ggml_graph_print(&gf); +#endif + + // plot the computation graph in dot format (for debugging purposes) + // if (n_past%100 == 0) { + // ggml_graph_dump_dot(&gf, NULL, "llama.dot"); + //} + + // embd_w.resize(n_vocab*N); + // memcpy(embd_w.data(), ggml_get_data(cur), sizeof(float)*n_vocab*N); + + // update kv token count + lctx.model.kv_self.n = n_past + N; + + // extract logits + { + auto &logits_out = lctx.logits; + + if (lctx.logits_all) { + logits_out.resize(n_vocab * N); + memcpy(logits_out.data(), (float *)ggml_get_data(cur), + sizeof(float) * n_vocab * N); + } else { + // return result for just the last token + logits_out.resize(n_vocab); + memcpy(logits_out.data(), + (float *)ggml_get_data(cur) + (n_vocab * (N - 1)), + sizeof(float) * n_vocab); + } + } + + // extract embeddings + if (!lctx.embedding.empty()) { + auto &embedding_out = lctx.embedding; + + embedding_out.resize(n_embd); + memcpy(embedding_out.data(), + (float *)ggml_get_data(embeddings) + (n_embd * (N - 1)), + sizeof(float) * n_embd); + } + + if (mem_per_token == 0) { + mem_per_token = ggml_used_mem(ctx0) / N; + } + +#if 0 + printf("\n%s: used_mem = %.3f MB, scratch -- %.3f MB %.3f MB\n", __func__, + ggml_used_mem(ctx0)/1024.0/1024.0, + lctx.get_buf_max_mem(0)/1024.0/1024.0, + lctx.get_buf_max_mem(1)/1024.0/1024.0); +#endif + + ggml_free(ctx0); + + // measure the performance only for the single-token evals + if (N == 1) { + lctx.t_eval_us += ggml_time_us() - t_start_us; + lctx.n_eval++; + } else if (N > 1) { + lctx.t_p_eval_us += ggml_time_us() - t_start_us; + lctx.n_p_eval += N; + } + + return true; +} + +// +// tokenizer +// + +static size_t utf8_len(char src) { + const size_t lookup[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4}; + uint8_t highbits = static_cast(src) >> 4; + return lookup[highbits]; +} + +struct llama_sp_symbol { + using index = int; + index prev; + index next; + const char *text; + size_t n; +}; + +static_assert(std::is_trivially_copyable::value, + "llama_sp_symbol is not trivially copyable"); + +struct llama_sp_bigram { + struct comparator { + bool operator()(llama_sp_bigram &l, llama_sp_bigram &r) { + return (l.score < r.score) || (l.score == r.score && l.left > r.left); + } + }; + using queue_storage = std::vector; + using queue = std::priority_queue; + llama_sp_symbol::index left; + llama_sp_symbol::index right; + float score; + size_t size; +}; + +// original implementation: +// https://github.com/ggerganov/llama.cpp/commit/074bea2eb1f1349a0118239c4152914aecaa1be4 +struct llama_tokenizer { + llama_tokenizer(const llama_vocab &vocab) : vocab_(vocab) {} + + void tokenize(const std::string &text, std::vector &output) { + // split string into utf8 chars + int index = 0; + size_t offs = 0; + while (offs < text.size()) { + llama_sp_symbol sym; + size_t char_len = std::min(text.size() - offs, utf8_len(text[offs])); + sym.text = text.c_str() + offs; + sym.n = char_len; + offs += char_len; + sym.prev = index - 1; + sym.next = offs == text.size() ? -1 : index + 1; + index++; + symbols_.emplace_back(sym); + } + + // seed the work queue with all possible 2-character tokens. + for (size_t i = 1; i < symbols_.size(); ++i) { + try_add_bigram(i - 1, i); + } + + // keep substituting the highest frequency pairs for as long as we can. + while (!work_queue_.empty()) { + auto bigram = work_queue_.top(); + work_queue_.pop(); + + auto &left_sym = symbols_[bigram.left]; + auto &right_sym = symbols_[bigram.right]; + + // if one of the symbols already got merged, skip it. + if (left_sym.n == 0 || right_sym.n == 0 || + left_sym.n + right_sym.n != bigram.size) { + continue; + } + + // merge the right sym into the left one + left_sym.n += right_sym.n; + right_sym.n = 0; + + // printf("left = '%*s' size = %zu\n", (int) left_sym.n, left_sym.text, + // bigram.size); + + // remove the right sym from the chain + left_sym.next = right_sym.next; + if (right_sym.next >= 0) { + symbols_[right_sym.next].prev = bigram.left; + } + + // find more substitutions + try_add_bigram(left_sym.prev, bigram.left); + try_add_bigram(bigram.left, left_sym.next); + } + + for (int i = 0; i != -1; i = symbols_[i].next) { + auto &symbol = symbols_[i]; + auto token = vocab_.token_to_id.find(std::string(symbol.text, symbol.n)); + + if (token == vocab_.token_to_id.end()) { + // output any symbols that did not form tokens as bytes. + for (int j = 0; j < (int)symbol.n; ++j) { + llama_vocab::id token_id = static_cast(symbol.text[j]) + 3; + output.push_back(token_id); + } + } else { + output.push_back((*token).second); + } + } + } + + private: + void try_add_bigram(int left, int right) { + if (left == -1 || right == -1) { + return; + } + + const std::string text = + std::string(symbols_[left].text, symbols_[left].n + symbols_[right].n); + auto token = vocab_.token_to_id.find(text); + + if (token == vocab_.token_to_id.end()) { + return; + } + + if (static_cast((*token).second) >= vocab_.id_to_token.size()) { + return; + } + + const auto &tok_score = vocab_.id_to_token[(*token).second]; + + llama_sp_bigram bigram; + bigram.left = left; + bigram.right = right; + bigram.score = tok_score.score; + bigram.size = text.size(); + work_queue_.push(bigram); + } + + const llama_vocab &vocab_; + std::vector symbols_; + llama_sp_bigram::queue work_queue_; +}; + +static std::vector llama_tokenize(const llama_vocab &vocab, + const std::string &text, + bool bos) { + llama_tokenizer tokenizer(vocab); + std::vector output; + + if (text.empty()) { + return output; + } + + if (bos) { + output.push_back(llama_token_bos()); + } + + tokenizer.tokenize(text, output); + return output; +} + +// +// sampling +// + +void llama_sample_softmax(struct llama_context *ctx, + llama_token_data_array *candidates) { + assert(candidates->size > 0); + + const int64_t t_start_sample_us = ggml_time_us(); + + // Sort the logits in descending order + if (!candidates->sorted) { + std::sort(candidates->data, candidates->data + candidates->size, + [](const llama_token_data &a, const llama_token_data &b) { + return a.logit > b.logit; + }); + candidates->sorted = true; + } + + float max_l = candidates->data[0].logit; + float cum_sum = 0.0f; + for (size_t i = 0; i < candidates->size; ++i) { + float p = expf(candidates->data[i].logit - max_l); + candidates->data[i].p = p; + cum_sum += p; + } + for (size_t i = 0; i < candidates->size; ++i) { + candidates->data[i].p /= cum_sum; + } + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void llama_sample_top_k(struct llama_context *ctx, + llama_token_data_array *candidates, int k, + size_t min_keep) { + const int64_t t_start_sample_us = ggml_time_us(); + + k = std::max(k, (int)min_keep); + k = std::min(k, (int)candidates->size); + + // Sort scores in descending order + if (!candidates->sorted) { + auto comp = [](const llama_token_data &a, const llama_token_data &b) { + return a.logit > b.logit; + }; + if (k == (int)candidates->size) { + std::sort(candidates->data, candidates->data + candidates->size, comp); + } else { + std::partial_sort(candidates->data, candidates->data + k, + candidates->data + candidates->size, comp); + } + candidates->sorted = true; + } + candidates->size = k; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void llama_sample_top_p(struct llama_context *ctx, + llama_token_data_array *candidates, float p, + size_t min_keep) { + if (p >= 1.0f) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + llama_sample_softmax(ctx, candidates); + + // Compute the cumulative probabilities + float cum_sum = 0.0f; + size_t last_idx = candidates->size; + + for (size_t i = 0; i < candidates->size; ++i) { + cum_sum += candidates->data[i].p; + + // Check if the running sum is greater than p or if we have kept at least + // min_keep tokens + if (cum_sum > p && i >= min_keep) { + last_idx = i; + break; + } + } + + // Resize the output vector to keep only the top-p tokens + candidates->size = last_idx; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void llama_sample_tail_free(struct llama_context *ctx, + llama_token_data_array *candidates, float z, + size_t min_keep) { + if (z >= 1.0f || candidates->size <= 2) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + llama_sample_softmax(nullptr, candidates); + + // Compute the first and second derivatives + std::vector first_derivatives(candidates->size - 1); + std::vector second_derivatives(candidates->size - 2); + + for (size_t i = 0; i < first_derivatives.size(); ++i) { + first_derivatives[i] = candidates->data[i].p - candidates->data[i + 1].p; + } + for (size_t i = 0; i < second_derivatives.size(); ++i) { + second_derivatives[i] = first_derivatives[i] - first_derivatives[i + 1]; + } + + // Calculate absolute value of second derivatives + for (size_t i = 0; i < second_derivatives.size(); ++i) { + second_derivatives[i] = abs(second_derivatives[i]); + } + + // Normalize the second derivatives + float second_derivatives_sum = std::accumulate( + second_derivatives.begin(), second_derivatives.end(), 0.0f); + for (float &value : second_derivatives) { + value /= second_derivatives_sum; + } + + float cum_sum = 0.0f; + size_t last_idx = candidates->size; + for (size_t i = 0; i < second_derivatives.size(); ++i) { + cum_sum += second_derivatives[i]; + + // Check if the running sum is greater than z or if we have kept at least + // min_keep tokens + if (cum_sum > z && i >= min_keep) { + last_idx = i; + break; + } + } + + // Resize the output vector to keep only the tokens above the tail location + candidates->size = last_idx; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void llama_sample_typical(struct llama_context *ctx, + llama_token_data_array *candidates, float p, + size_t min_keep) { + // Reference implementation: + // https://github.com/huggingface/transformers/compare/main...cimeister:typical-sampling:typical-pr + if (p >= 1.0f) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + // Compute the softmax of logits and calculate entropy + llama_sample_softmax(nullptr, candidates); + + float entropy = 0.0f; + for (size_t i = 0; i < candidates->size; ++i) { + entropy += -candidates->data[i].p * logf(candidates->data[i].p); + } + + // Compute the absolute difference between negative log probability and + // entropy for each candidate + std::vector shifted_scores; + for (size_t i = 0; i < candidates->size; ++i) { + float shifted_score = fabsf(-logf(candidates->data[i].p) - entropy); + shifted_scores.push_back(shifted_score); + } + + // Sort tokens based on the shifted_scores and their corresponding indices + std::vector indices(candidates->size); + std::iota(indices.begin(), indices.end(), 0); + + std::sort(indices.begin(), indices.end(), [&](size_t a, size_t b) { + return shifted_scores[a] < shifted_scores[b]; + }); + + // Compute the cumulative probabilities + float cum_sum = 0.0f; + size_t last_idx = indices.size(); + + for (size_t i = 0; i < indices.size(); ++i) { + size_t idx = indices[i]; + cum_sum += candidates->data[idx].p; + + // Check if the running sum is greater than typical or if we have kept at + // least min_keep tokens + if (cum_sum > p && i >= min_keep - 1) { + last_idx = i + 1; + break; + } + } + + // Resize the output vector to keep only the locally typical tokens + std::vector new_candidates; + for (size_t i = 0; i < last_idx; ++i) { + size_t idx = indices[i]; + new_candidates.push_back(candidates->data[idx]); + } + + // Replace the data in candidates with the new_candidates data + std::copy(new_candidates.begin(), new_candidates.end(), candidates->data); + candidates->size = new_candidates.size(); + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void llama_sample_temperature(struct llama_context *ctx, + llama_token_data_array *candidates_p, + float temp) { + const int64_t t_start_sample_us = ggml_time_us(); + + for (size_t i = 0; i < candidates_p->size; ++i) { + candidates_p->data[i].logit /= temp; + } + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void llama_sample_repetition_penalty(struct llama_context *ctx, + llama_token_data_array *candidates, + const llama_token *last_tokens, + size_t last_tokens_size, float penalty) { + if (last_tokens_size == 0 || penalty == 1.0f) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + for (size_t i = 0; i < candidates->size; ++i) { + const auto *token_iter = std::find( + last_tokens, last_tokens + last_tokens_size, candidates->data[i].id); + if (token_iter == last_tokens + last_tokens_size) { + continue; + } + + // The academic publication that described this technique actually just only + // divided, but that would cause tokens with negative logits to become more + // likely, which is obviously wrong. This is common fix for this problem, + // which is to multiply by the penalty instead of dividing. + if (candidates->data[i].logit <= 0) { + candidates->data[i].logit *= penalty; + } else { + candidates->data[i].logit /= penalty; + } + } + + candidates->sorted = false; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +void llama_sample_frequency_and_presence_penalties( + struct llama_context *ctx, llama_token_data_array *candidates, + const llama_token *last_tokens_p, size_t last_tokens_size, + float alpha_frequency, float alpha_presence) { + if (last_tokens_size == 0 || + (alpha_frequency == 0.0f && alpha_presence == 0.0f)) { + return; + } + + const int64_t t_start_sample_us = ggml_time_us(); + + // Create a frequency map to count occurrences of each token in last_tokens + std::unordered_map token_count; + for (size_t i = 0; i < last_tokens_size; ++i) { + token_count[last_tokens_p[i]]++; + } + + // Apply frequency and presence penalties to the candidates + for (size_t i = 0; i < candidates->size; ++i) { + auto token_iter = token_count.find(candidates->data[i].id); + if (token_iter == token_count.end()) { + continue; + } + + int count = token_iter->second; + candidates->data[i].logit -= + float(count) * alpha_frequency + float(count > 0) * alpha_presence; + } + + candidates->sorted = false; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } +} + +llama_token llama_sample_token_mirostat(struct llama_context *ctx, + llama_token_data_array *candidates, + float tau, float eta, int m, + float *mu) { + assert(ctx); + auto N = float(llama_n_vocab(ctx)); + int64_t t_start_sample_us; + t_start_sample_us = ggml_time_us(); + + llama_sample_softmax(nullptr, candidates); + + // Estimate s_hat using the most probable m tokens + float s_hat = 0.0; + float sum_ti_bi = 0.0; + float sum_ti_sq = 0.0; + for (size_t i = 0; i < size_t(m - 1) && i < candidates->size - 1; ++i) { + float t_i = logf(float(i + 2) / float(i + 1)); + float b_i = logf(candidates->data[i].p / candidates->data[i + 1].p); + sum_ti_bi += t_i * b_i; + sum_ti_sq += t_i * t_i; + } + s_hat = sum_ti_bi / sum_ti_sq; + + // Compute k from the estimated s_hat and target surprise value + float epsilon_hat = s_hat - 1; + float k = powf((epsilon_hat * powf(2, *mu)) / (1 - powf(N, -epsilon_hat)), + 1 / s_hat); + + // Sample the next word X using top-k sampling + llama_sample_top_k(nullptr, candidates, int(k), 1); + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } + llama_token X = llama_sample_token(ctx, candidates); + t_start_sample_us = ggml_time_us(); + + // Compute error as the difference between observed surprise and target + // surprise value + size_t X_idx = std::distance( + candidates->data, + std::find_if(candidates->data, candidates->data + candidates->size, + [&](const llama_token_data &candidate) { + return candidate.id == X; + })); + float observed_surprise = -log2f(candidates->data[X_idx].p); + float e = observed_surprise - tau; + + // Update mu using the learning rate and error + *mu = *mu - eta * e; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + ctx->n_sample++; + } + return X; +} + +llama_token llama_sample_token_mirostat_v2(struct llama_context *ctx, + llama_token_data_array *candidates, + float tau, float eta, float *mu) { + assert(ctx); + int64_t t_start_sample_us; + t_start_sample_us = ggml_time_us(); + + llama_sample_softmax(ctx, candidates); + + // Truncate the words with surprise values greater than mu + candidates->size = std::distance( + candidates->data, + std::find_if(candidates->data, candidates->data + candidates->size, + [&](const llama_token_data &candidate) { + return -log2f(candidate.p) > *mu; + })); + + if (candidates->size == 0) { + candidates->size = 1; + } + + // Normalize the probabilities of the remaining words + llama_sample_softmax(ctx, candidates); + + // Sample the next word X from the remaining words + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } + llama_token X = llama_sample_token(ctx, candidates); + t_start_sample_us = ggml_time_us(); + + // Compute error as the difference between observed surprise and target + // surprise value + size_t X_idx = std::distance( + candidates->data, + std::find_if(candidates->data, candidates->data + candidates->size, + [&](const llama_token_data &candidate) { + return candidate.id == X; + })); + float observed_surprise = -log2f(candidates->data[X_idx].p); + float e = observed_surprise - tau; + + // Update mu using the learning rate and error + *mu = *mu - eta * e; + + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + } + return X; +} + +llama_token llama_sample_token_greedy(struct llama_context *ctx, + llama_token_data_array *candidates) { + const int64_t t_start_sample_us = ggml_time_us(); + + // Find max element + auto *max_iter = std::max_element( + candidates->data, candidates->data + candidates->size, + [](const llama_token_data &a, const llama_token_data &b) { + return a.logit < b.logit; + }); + + llama_token result = max_iter->id; + if (ctx) { + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + ctx->n_sample++; + } + return result; +} + +llama_token llama_sample_token(struct llama_context *ctx, + llama_token_data_array *candidates) { + assert(ctx); + const int64_t t_start_sample_us = ggml_time_us(); + llama_sample_softmax(nullptr, candidates); + + std::vector probs; + probs.reserve(candidates->size); + for (size_t i = 0; i < candidates->size; ++i) { + probs.push_back(candidates->data[i].p); + } + + std::discrete_distribution<> dist(probs.begin(), probs.end()); + auto &rng = ctx->rng; + int idx = dist(rng); + + llama_token result = candidates->data[idx].id; + + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + ctx->n_sample++; + return result; +} + +// +// quantization +// + +static void llama_convert_tensor_internal(const llama_load_tensor &tensor, + llama_buffer &output, + const int nelements, + const int nthread) { + if (output.size < nelements * sizeof(float)) { + output.resize(nelements * sizeof(float)); + } + float *f32_output = (float *)output.addr; + + quantize_fns_t qtype; + if (ggml_is_quantized(tensor.type)) { + qtype = ggml_internal_get_quantize_fn(tensor.type); + if (qtype.dequantize_row_q == NULL) { + throw std::runtime_error( + format("type %s unsupported for integer quantization: no " + "dequantization available", + ggml_type_name(tensor.type))); + } + } else if (tensor.type != GGML_TYPE_F16) { + throw std::runtime_error(format("cannot dequantize/convert tensor type %s", + ggml_type_name(tensor.type))); + } + + if (nthread < 2) { + if (tensor.type == GGML_TYPE_F16) { + ggml_fp16_to_fp32_row((ggml_fp16_t *)tensor.data, f32_output, nelements); + } else if (ggml_is_quantized(tensor.type)) { + qtype.dequantize_row_q(tensor.data, f32_output, nelements); + } else { + LLAMA_ASSERT(false); // unreachable + } + return; + } + + auto block_size = + tensor.type == GGML_TYPE_F16 ? 1 : (size_t)ggml_blck_size(tensor.type); + auto block_size_bytes = ggml_type_size(tensor.type); + + LLAMA_ASSERT(nelements % block_size == 0); + auto nblocks = nelements / block_size; + auto blocks_per_thread = nblocks / nthread; + auto spare_blocks = + nblocks - (blocks_per_thread * + nthread); // if blocks aren't divisible by thread count + + std::vector workers; + for (auto tnum = 0, in_buff_offs = 0, out_buff_offs = 0; tnum < nthread; + tnum++) { + auto thr_blocks = + blocks_per_thread + + (tnum == nthread - 1 ? spare_blocks : 0); // num blocks for this thread + auto thr_elems = + thr_blocks * block_size; // number of elements for this thread + auto thr_block_bytes = + thr_blocks * block_size_bytes; // number of input bytes for this thread + + auto compute = [qtype](ggml_type typ, uint8_t *inbuf, float *outbuf, + int nels) { + if (typ == GGML_TYPE_F16) { + ggml_fp16_to_fp32_row((ggml_fp16_t *)inbuf, outbuf, nels); + } else { + qtype.dequantize_row_q(inbuf, outbuf, nels); + } + }; + workers.push_back(std::thread(compute, tensor.type, + tensor.data + in_buff_offs, + f32_output + out_buff_offs, thr_elems)); + in_buff_offs += thr_block_bytes; + out_buff_offs += thr_elems; + } + for (auto &worker : workers) { + worker.join(); + } +} + +static void llama_model_quantize_internal( + const std::string &fname_inp, const std::string &fname_out, + const llama_model_quantize_params *params) { + ggml_type quantized_type; + llama_ftype ftype = params->ftype; + int nthread = params->nthread; + + switch (params->ftype) { + case LLAMA_FTYPE_MOSTLY_Q4_0: + quantized_type = GGML_TYPE_Q4_0; + break; + case LLAMA_FTYPE_MOSTLY_Q4_1: + quantized_type = GGML_TYPE_Q4_1; + break; + case LLAMA_FTYPE_MOSTLY_Q5_0: + quantized_type = GGML_TYPE_Q5_0; + break; + case LLAMA_FTYPE_MOSTLY_Q5_1: + quantized_type = GGML_TYPE_Q5_1; + break; + case LLAMA_FTYPE_MOSTLY_Q8_0: + quantized_type = GGML_TYPE_Q8_0; + break; + case LLAMA_FTYPE_MOSTLY_F16: + quantized_type = GGML_TYPE_F16; + break; + case LLAMA_FTYPE_ALL_F32: + quantized_type = GGML_TYPE_F32; + break; + +#ifdef GGML_USE_K_QUANTS + // K-quants + case LLAMA_FTYPE_MOSTLY_Q2_K: + quantized_type = GGML_TYPE_Q2_K; + break; + case LLAMA_FTYPE_MOSTLY_Q3_K_S: + case LLAMA_FTYPE_MOSTLY_Q3_K_M: + case LLAMA_FTYPE_MOSTLY_Q3_K_L: + quantized_type = GGML_TYPE_Q3_K; + break; + case LLAMA_FTYPE_MOSTLY_Q4_K_S: + case LLAMA_FTYPE_MOSTLY_Q4_K_M: + quantized_type = GGML_TYPE_Q4_K; + break; + case LLAMA_FTYPE_MOSTLY_Q5_K_S: + case LLAMA_FTYPE_MOSTLY_Q5_K_M: + quantized_type = GGML_TYPE_Q5_K; + break; + case LLAMA_FTYPE_MOSTLY_Q6_K: + quantized_type = GGML_TYPE_Q6_K; + break; +#endif + default: + throw std::runtime_error(format("invalid output file type %d\n", ftype)); + } + + if (nthread <= 0) { + nthread = std::thread::hardware_concurrency(); + } + + std::unique_ptr model_loader( + new llama_model_loader(fname_inp, /*use_mmap*/ false, + /*vocab_only*/ false)); + llama_file_saver file_saver( + fname_out.c_str(), model_loader->file_loaders.at(0).get(), params->ftype); + +#ifdef GGML_USE_K_QUANTS + int n_attention_wv = 0; + int n_feed_forward_w2 = 0; + for (auto &tensor : model_loader->tensors_map.tensors) { + if (tensor.name.find("attention.wv.weight") != std::string::npos) { + ++n_attention_wv; + } else if (tensor.name.find("feed_forward.w2.weight") != + std::string::npos) { + ++n_feed_forward_w2; + } + } + + int i_attention_wv = 0; + int i_feed_forward_w2 = 0; +#endif + + size_t total_size_org = 0; + size_t total_size_new = 0; + std::vector hist_all(1 << 4, 0); + + std::vector workers; + std::mutex mutex; + + size_t idx = 0; + for (llama_load_tensor &tensor : model_loader->tensors_map.tensors) { + llama_buffer read_data; + read_data.resize(tensor.size); + tensor.data = read_data.addr; + model_loader->load_data_for(tensor); + + printf("[%4zu/%4zu] %36s - %16s, type = %6s, ", ++idx, + model_loader->tensors_map.tensors.size(), tensor.name.c_str(), + llama_format_tensor_shape(tensor.ne).c_str(), + ggml_type_name(tensor.type)); + + // This used to be a regex, but has an extreme cost to compile + // times. + bool quantize = tensor.name.rfind("weight") == + tensor.name.size() - 6; // ends with 'weight'? + + // quantize only 2D tensors + quantize &= (tensor.ne.size() == 2); + quantize &= + params->quantize_output_tensor || tensor.name != "output.weight"; + quantize &= quantized_type != tensor.type; + + enum ggml_type new_type; + void *new_data; + size_t new_size; + llama_buffer work; + + if (!quantize) { + new_type = tensor.type; + new_data = tensor.data; + new_size = tensor.size; + printf("size = %8.3f MB\n", tensor.size / 1024.0 / 1024.0); + } else { + new_type = quantized_type; +#ifdef GGML_USE_K_QUANTS + if (quantized_type == GGML_TYPE_Q2_K || + quantized_type == GGML_TYPE_Q3_K || + quantized_type == GGML_TYPE_Q4_K || + quantized_type == GGML_TYPE_Q5_K || + quantized_type == GGML_TYPE_Q6_K) { + int nx = tensor.ne.at(0); + int ny = tensor.ne.at(0); + if (nx % QK_K != 0 || ny % QK_K != 0) { + fprintf(stderr, + "\n\n========================= Tensor sizes %d x %d are not " + "divisible by %d\n", + nx, ny, QK_K); + fprintf(stderr, + "This is required to be able to use k-quants for now!\n"); + fprintf(stderr, + "============================================================" + "============================\n\n"); + throw std::runtime_error("Unsupported tensor size encountered\n"); + } + } + if (tensor.name == "output.weight") { + new_type = GGML_TYPE_Q6_K; + } else if (tensor.name.find("attention.wv.weight") != std::string::npos) { + if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || + ftype == LLAMA_FTYPE_MOSTLY_Q2_K) + new_type = GGML_TYPE_Q4_K; + else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) + new_type = GGML_TYPE_Q5_K; + else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || + ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) && + (i_attention_wv < n_attention_wv / 8 || + i_attention_wv >= 7 * n_attention_wv / 8 || + (i_attention_wv - n_attention_wv / 8) % 3 == 2)) + new_type = GGML_TYPE_Q6_K; + ++i_attention_wv; + } else if (tensor.name.find("feed_forward.w2.weight") != + std::string::npos) { + if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || + ftype == LLAMA_FTYPE_MOSTLY_Q2_K) + new_type = GGML_TYPE_Q4_K; + else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) + new_type = GGML_TYPE_Q5_K; + else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || + ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) && + (i_feed_forward_w2 < n_feed_forward_w2 / 8 || + i_feed_forward_w2 >= 7 * n_feed_forward_w2 / 8 || + (i_feed_forward_w2 - n_feed_forward_w2 / 8) % 3 == 2)) + new_type = GGML_TYPE_Q6_K; + ++i_feed_forward_w2; + } else if (tensor.name.find("attention.wo.weight") != std::string::npos) { + if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || + ftype == LLAMA_FTYPE_MOSTLY_Q2_K) + new_type = GGML_TYPE_Q4_K; + else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) + new_type = GGML_TYPE_Q5_K; + } +#endif + + float *f32_data; + size_t nelements = tensor.ne.at(0) * tensor.ne.at(1); + llama_buffer f32_conv_buf; + + if (tensor.type == GGML_TYPE_F32) { + f32_data = (float *)tensor.data; + } else if (ggml_is_quantized(tensor.type) && !params->allow_requantize) { + throw std::runtime_error(format("requantizing from type %s is disabled", + ggml_type_name(tensor.type))); + } else { + llama_convert_tensor_internal(tensor, f32_conv_buf, nelements, nthread); + f32_data = (float *)f32_conv_buf.addr; + } + + printf("quantizing .. "); + fflush(stdout); + + work.resize(nelements * 4); // upper bound on size + new_data = work.addr; + std::vector hist_cur(1 << 4, 0); + + int chunk_size = 32 * 512; + const int nchunk = (nelements + chunk_size - 1) / chunk_size; + const int nthread_use = + nthread > 1 ? std::max(1, std::min(nthread, nchunk)) : 1; + if (nthread_use < 2) { + new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, + nelements, hist_cur.data()); + } else { + size_t counter = 0; + new_size = 0; + auto compute = [&mutex, &counter, &hist_cur, &new_size, new_type, + f32_data, new_data, nelements, chunk_size]() { + std::vector local_hist; + size_t local_size = 0; + while (true) { + std::unique_lock lock(mutex); + size_t first = counter; + counter += chunk_size; + if (first >= nelements) { + if (!local_hist.empty()) { + for (int j = 0; j < int(local_hist.size()); ++j) { + hist_cur[j] += local_hist[j]; + } + new_size += local_size; + } + break; + } + lock.unlock(); + size_t last = std::min(nelements, first + chunk_size); + if (local_hist.empty()) { + local_hist.resize(hist_cur.size(), 0); + } + local_size += + ggml_quantize_chunk(new_type, f32_data, new_data, first, + last - first, local_hist.data()); + } + }; + if ((int)workers.size() < nthread_use - 1) { + workers.resize(nthread_use - 1); + } + for (int it = 0; it < nthread_use - 1; ++it) { + workers[it] = std::thread(compute); + } + compute(); + for (int it = 0; it < nthread_use - 1; ++it) { + workers[it].join(); + } + } + + printf("size = %8.2f MB -> %8.2f MB | hist: ", + tensor.size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0); + int64_t tot_count = 0; + for (size_t i = 0; i < hist_cur.size(); i++) { + hist_all[i] += hist_cur[i]; + tot_count += hist_cur[i]; + } + + if (tot_count > 0) { + for (size_t i = 0; i < hist_cur.size(); i++) { + printf("%5.3f ", hist_cur[i] / float(nelements)); + } + } + printf("\n"); + } + total_size_org += tensor.size; + total_size_new += new_size; + file_saver.write_tensor(tensor, new_type, new_data, new_size); + } + + printf("%s: model size = %8.2f MB\n", __func__, + total_size_org / 1024.0 / 1024.0); + printf("%s: quant size = %8.2f MB\n", __func__, + total_size_new / 1024.0 / 1024.0); + + { + int64_t sum_all = 0; + for (size_t i = 0; i < hist_all.size(); i++) { + sum_all += hist_all[i]; + } + + if (sum_all > 0) { + printf("%s: hist: ", __func__); + for (size_t i = 0; i < hist_all.size(); i++) { + printf("%5.3f ", hist_all[i] / float(sum_all)); + } + printf("\n"); + } + } +} + +// +// interface implementation +// + +struct llama_context *llama_init_from_file(const char *path_model, + struct llama_context_params params) { + ggml_time_init(); + + llama_context *ctx = new llama_context; + + if (params.seed < 0) { + params.seed = time(NULL); + } + + unsigned cur_percentage = 0; + if (params.progress_callback == NULL) { + params.progress_callback_user_data = &cur_percentage; + params.progress_callback = [](float progress, void *ctx) { + unsigned *cur_percentage_p = (unsigned *)ctx; + unsigned percentage = (unsigned)(100 * progress); + while (percentage > *cur_percentage_p) { + *cur_percentage_p = percentage; + } + }; + } + + ctx->rng = std::mt19937(params.seed); + ctx->logits_all = params.logits_all; + + ggml_type memory_type = params.f16_kv ? GGML_TYPE_F16 : GGML_TYPE_F32; + + if (!llama_model_load( + path_model, *ctx, params.n_ctx, params.n_batch, params.n_gpu_layers, + params.main_gpu, params.tensor_split, params.low_vram, memory_type, + params.use_mmap, params.use_mlock, params.vocab_only, + params.progress_callback, params.progress_callback_user_data)) { + fprintf(stderr, "%s: failed to load model\n", __func__); + llama_free(ctx); + return nullptr; + } + + // reserve memory for context buffers + if (!params.vocab_only) { + if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, + ctx->model.hparams.n_ctx, params.n_gpu_layers)) { + fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", + __func__); + llama_free(ctx); + return nullptr; + } + + const auto &hparams = ctx->model.hparams; + + // resized during inference + if (params.logits_all) { + ctx->logits.reserve(hparams.n_ctx * hparams.n_vocab); + } else { + ctx->logits.reserve(hparams.n_vocab); + } + + if (params.embedding) { + ctx->embedding.resize(hparams.n_embd); + } + + ctx->buf_compute.resize(MEM_REQ_EVAL().at(ctx->model.type)); + + ctx->buf_scratch[0].resize(MEM_REQ_SCRATCH0().at(ctx->model.type)); + ctx->buf_scratch[1].resize(MEM_REQ_SCRATCH1().at(ctx->model.type)); + } + +#ifdef GGML_USE_METAL + if (params.n_gpu_layers > 0) { + // this allocates all Metal resources and memory buffers + ctx->ctx_metal = ggml_metal_init(); + + void *data_ptr = NULL; + size_t data_size = 0; + + if (params.use_mmap) { + data_ptr = ctx->model.mapping->addr; + data_size = ctx->model.mapping->size; + } else { + data_ptr = ggml_get_mem_buffer(ctx->model.ctx); + data_size = ggml_get_mem_size(ctx->model.ctx); + } + + const size_t max_size = ggml_get_max_tensor_size(ctx->model.ctx); + + printf("%s: max tensor size = %8.2f MB\n", __func__, + max_size / 1024.0 / 1024.0); + +#define LLAMA_METAL_CHECK_BUF(result) \ + if (!(result)) { \ + fprintf(stderr, "%s: failed to add buffer\n", __func__); \ + llama_free(ctx); \ + return NULL; \ + } + + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "data", + data_ptr, data_size, max_size)); + + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "eval", + ctx->buf_compute.addr, + ctx->buf_compute.size, 0)); + LLAMA_METAL_CHECK_BUF( + ggml_metal_add_buffer(ctx->ctx_metal, "kv", ctx->model.kv_self.buf.addr, + ctx->model.kv_self.buf.size, 0)); + + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr0", + ctx->buf_scratch[0].addr, + ctx->buf_scratch[0].size, 0)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr1", + ctx->buf_scratch[1].addr, + ctx->buf_scratch[1].size, 0)); +#undef LLAMA_METAL_CHECK_BUF + } +#endif + + return ctx; +} + +void llama_free(struct llama_context *ctx) { delete ctx; } + +int llama_model_quantize(const char *fname_inp, const char *fname_out, + const llama_model_quantize_params *params) { + try { + llama_model_quantize_internal(fname_inp, fname_out, params); + return 0; + } catch (const std::exception &err) { + fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.what()); + return 1; + } +} + +int llama_apply_lora_from_file_internal(struct llama_context *ctx, + const char *path_lora, + const char *path_base_model, + int n_threads) { + fprintf(stderr, "%s: applying lora adapter from '%s' - please wait ...\n", + __func__, path_lora); + + auto &model = ctx->model; + + const int64_t t_start_lora_us = ggml_time_us(); + + auto fin = std::ifstream(path_lora, std::ios::binary); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, path_lora); + return 1; + } + + // verify magic and version + { + uint32_t magic; + fin.read((char *)&magic, sizeof(magic)); + if (magic != LLAMA_FILE_MAGIC_GGLA) { + fprintf(stderr, "%s: bad file magic\n", __func__); + return 1; + } + uint32_t format_version; + fin.read((char *)&format_version, sizeof(format_version)); + + if (format_version != 1) { + fprintf(stderr, "%s: unsupported file version\n", __func__); + return 1; + } + } + + int32_t lora_r; + int32_t lora_alpha; + fin.read((char *)&lora_r, sizeof(lora_r)); + fin.read((char *)&lora_alpha, sizeof(lora_alpha)); + float scaling = (float)lora_alpha / (float)lora_r; + + fprintf(stderr, "%s: r = %d, alpha = %d, scaling = %.2f\n", __func__, lora_r, + lora_alpha, scaling); + + // create a temporary ggml context to store the lora tensors + // todo: calculate size from biggest possible tensor + std::vector lora_buf(1024ull * 1024ull * 1024ull); + struct ggml_init_params params; + params.mem_size = lora_buf.size(); + params.mem_buffer = lora_buf.data(); + params.no_alloc = false; + + ggml_context *lora_ctx = ggml_init(params); + std::unordered_map lora_tensors; + + // create a name -> tensor map of the model to accelerate lookups + std::unordered_map model_tensors; + for (auto &kv : model.tensors_by_name) { + model_tensors.insert(kv); + } + + // load base model + std::unique_ptr model_loader; + ggml_context *base_ctx = NULL; + llama_buffer base_buf; + if (path_base_model) { + fprintf(stderr, "%s: loading base model from '%s'\n", __func__, + path_base_model); + model_loader.reset(new llama_model_loader( + path_base_model, /*use_mmap*/ true, /*vocab_only*/ false)); + + size_t ctx_size; + size_t mmapped_size; + model_loader->calc_sizes(&ctx_size, &mmapped_size); + base_buf.resize(ctx_size); + + ggml_init_params base_params; + base_params.mem_size = base_buf.size; + base_params.mem_buffer = base_buf.addr; + base_params.no_alloc = model_loader->use_mmap; + + base_ctx = ggml_init(base_params); + + model_loader->ggml_ctx = base_ctx; + + // maybe this should in llama_model_loader + if (model_loader->use_mmap) { + model_loader->mapping.reset(new llama_mmap( + &model_loader->file_loaders.at(0)->file, /* prefetch */ 0)); + } + } + + // read tensors and apply + bool warned = false; + int n_tensors = 0; + while (true) { + int32_t n_dims; + int32_t length; + int32_t ftype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ftype), sizeof(ftype)); + if (fin.eof()) { + break; + } + + int32_t ne[2] = {1, 1}; + for (int i = 0; i < n_dims; ++i) { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + } + + std::string name; + { + char buf[1024]; + fin.read(buf, length); + name = std::string(buf, length); + } + + // check for lora suffix and get the type of tensor + const std::string lora_suffix = ".lora"; + size_t pos = name.rfind(lora_suffix); + if (pos == std::string::npos) { + fprintf(stderr, "%s: error: '%s' is not a lora tensor\n", __func__, + name.c_str()); + return 1; + } + + std::string lora_type = name.substr(pos + lora_suffix.length()); + std::string base_name = name; + base_name.erase(pos); + // fprintf(stderr, "%s: %s => %s (lora type %s) ", __func__, + // name.c_str(),base_name.c_str(), lora_type.c_str()); + + if (model_tensors.find(base_name) == model_tensors.end()) { + fprintf(stderr, "%s: unknown tensor '%s' in lora adapter\n", __func__, + name.data()); + return 1; + } + + // create ggml tensor + ggml_type wtype; + switch (ftype) { + case 0: + wtype = GGML_TYPE_F32; + break; + case 1: + wtype = GGML_TYPE_F16; + break; + default: { + fprintf(stderr, "%s: invalid tensor data type '%d'\n", __func__, ftype); + return false; + } + } + ggml_tensor *lora_tensor; + if (n_dims == 2) { + lora_tensor = ggml_new_tensor_2d(lora_ctx, wtype, ne[0], ne[1]); + } else { + fprintf(stderr, "%s: unsupported tensor dimension %d\n", __func__, + n_dims); + return 1; + } + + // load tensor data + size_t offset = fin.tellg(); + size_t tensor_data_size = ggml_nbytes(lora_tensor); + offset = (offset + 31) & -32; + fin.seekg(offset); + fin.read((char *)lora_tensor->data, tensor_data_size); + + lora_tensors[name] = lora_tensor; + + // check if we have both A and B tensors and apply + if (lora_tensors.find(base_name + ".loraA") != lora_tensors.end() && + lora_tensors.find(base_name + ".loraB") != lora_tensors.end()) { + ggml_tensor *dest_t = model_tensors[base_name]; + ggml_tensor *base_t; + if (model_loader) { + // load from base model + if (model_loader->tensors_map.name_to_idx.find(base_name) == + model_loader->tensors_map.name_to_idx.end()) { + fprintf(stderr, "%s: error: tensor '%s' not found in base model\n", + __func__, base_name.c_str()); + return 1; + } + size_t idx = model_loader->tensors_map.name_to_idx[base_name]; + llama_load_tensor < = model_loader->tensors_map.tensors[idx]; + base_t = model_loader->get_tensor( + base_name, {(uint32_t)dest_t->ne[0], (uint32_t)dest_t->ne[1]}, + GGML_BACKEND_CPU); + lt.data = (uint8_t *)lt.ggml_tensor->data; + model_loader->load_data_for(lt); + lt.ggml_tensor->data = lt.data; + } else { + base_t = dest_t; + } + + if (ggml_is_quantized(base_t->type)) { + if (!warned) { + fprintf(stderr, + "%s: warning: using a lora adapter with a quantized model " + "may result in poor quality, " + "use a f16 or f32 base model with --lora-base\n", + __func__); + warned = true; + } + } + + ggml_tensor *loraA = lora_tensors[base_name + ".loraA"]; + ggml_tensor *loraB = lora_tensors[base_name + ".loraB"]; + + if (base_t->ne[0] != loraA->ne[1] || base_t->ne[1] != loraB->ne[1]) { + fprintf(stderr, + "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 + ");" + " are you sure that this adapter is for this model?\n", + __func__, base_t->ne[0], loraA->ne[1]); + return 1; + } + + // w = w + BA*s + ggml_tensor *BA = ggml_mul_mat(lora_ctx, loraA, loraB); + + if (scaling != 1.0f) { + ggml_tensor *scale_tensor = ggml_new_f32(lora_ctx, scaling); + BA = ggml_scale_inplace(lora_ctx, BA, scale_tensor); + } + + ggml_tensor *r; + if (base_t == dest_t) { + r = ggml_add_inplace(lora_ctx, dest_t, BA); + } else { + r = ggml_add(lora_ctx, base_t, BA); + r = ggml_cpy(lora_ctx, r, dest_t); + } + + struct ggml_cgraph gf = ggml_build_forward(r); + gf.n_threads = n_threads; + ggml_graph_compute(lora_ctx, &gf); + + // we won't need these tensors again, reset the context to save memory + ggml_free(lora_ctx); + lora_ctx = ggml_init(params); + lora_tensors.clear(); + + n_tensors++; + if (n_tensors % 4 == 0) { + fprintf(stderr, "."); + } + } + } + + // TODO: this should be in a destructor, it will leak on failure + ggml_free(lora_ctx); + if (base_ctx) { + ggml_free(base_ctx); + } + + const int64_t t_lora_us = ggml_time_us() - t_start_lora_us; + fprintf(stderr, " done (%.2f ms)\n", t_lora_us / 1000.0); + + return 0; +} + +int llama_apply_lora_from_file(struct llama_context *ctx, const char *path_lora, + const char *path_base_model, int n_threads) { + try { + return llama_apply_lora_from_file_internal(ctx, path_lora, path_base_model, + n_threads); + } catch (const std::exception &err) { + fprintf(stderr, "%s: failed to apply lora adapter: %s\n", __func__, + err.what()); + return 1; + } +} + +int llama_get_kv_cache_token_count(const struct llama_context *ctx) { + return ctx->model.kv_self.n; +} + +#define LLAMA_MAX_RNG_STATE (64 * 1024) + +void llama_set_rng_seed(struct llama_context *ctx, int seed) { + if (seed < 0) { + seed = time(NULL); + } + ctx->rng.seed(seed); +} + +// Returns the *maximum* size of the state +size_t llama_get_state_size(const struct llama_context *ctx) { + // we don't know size of rng until we actually serialize it. so reserve more + // than enough memory for its serialized state. for reference, + // std::mt19937(1337) serializes to 6701 bytes. + const size_t s_rng_size = sizeof(size_t); + const size_t s_rng = LLAMA_MAX_RNG_STATE; + const size_t s_logits_capacity = sizeof(size_t); + const size_t s_logits_size = sizeof(size_t); + const size_t s_logits = ctx->logits.capacity() * sizeof(float); + const size_t s_embedding_size = sizeof(size_t); + const size_t s_embedding = ctx->embedding.size() * sizeof(float); + const size_t s_kv_size = sizeof(size_t); + const size_t s_kv_ntok = sizeof(int); + const size_t s_kv = ctx->model.kv_self.buf.size; + + const size_t s_total = + (+s_rng_size + s_rng + s_logits_capacity + s_logits_size + s_logits + + s_embedding_size + s_embedding + s_kv_size + s_kv_ntok + s_kv); + + return s_total; +} + +// Copies the state to the specified destination address +size_t llama_copy_state_data(struct llama_context *ctx, uint8_t *dst) { + uint8_t *out = dst; + + // copy rng + { + std::stringstream rng_ss; + rng_ss << ctx->rng; + + const size_t rng_size = rng_ss.str().size(); + char rng_buf[LLAMA_MAX_RNG_STATE]; + + memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE); + memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size()); + + memcpy(out, &rng_size, sizeof(rng_size)); + out += sizeof(rng_size); + memcpy(out, &rng_buf[0], LLAMA_MAX_RNG_STATE); + out += LLAMA_MAX_RNG_STATE; + } + + // copy logits + { + const size_t logits_cap = ctx->logits.capacity(); + const size_t logits_size = ctx->logits.size(); + + memcpy(out, &logits_cap, sizeof(logits_cap)); + out += sizeof(logits_cap); + memcpy(out, &logits_size, sizeof(logits_size)); + out += sizeof(logits_size); + + if (logits_size) { + memcpy(out, ctx->logits.data(), logits_size * sizeof(float)); + } + + out += logits_cap * sizeof(float); + } + + // copy embeddings + { + const size_t embedding_size = ctx->embedding.size(); + + memcpy(out, &embedding_size, sizeof(embedding_size)); + out += sizeof(embedding_size); + + if (embedding_size) { + memcpy(out, ctx->embedding.data(), embedding_size * sizeof(float)); + out += embedding_size * sizeof(float); + } + } + + // copy kv cache + { + const auto &kv_self = ctx->model.kv_self; + const auto &hparams = ctx->model.hparams; + const int n_layer = hparams.n_layer; + const int n_embd = hparams.n_embd; + const int n_ctx = hparams.n_ctx; + + const size_t kv_size = kv_self.buf.size; + const int kv_ntok = llama_get_kv_cache_token_count(ctx); + + memcpy(out, &kv_size, sizeof(kv_size)); + out += sizeof(kv_size); + memcpy(out, &kv_ntok, sizeof(kv_ntok)); + out += sizeof(kv_ntok); + + if (kv_size) { + const size_t elt_size = ggml_element_size(kv_self.k); + + char buffer[4096]; + + ggml_context *cpy_ctx = + ggml_init({sizeof(buffer), buffer, /* no_alloc */ true}); + ggml_cgraph gf{}; + gf.n_threads = 1; + + ggml_tensor *kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, + kv_ntok, n_layer); + kout3d->data = out; + out += ggml_nbytes(kout3d); + + ggml_tensor *vout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.v->type, + kv_ntok, n_embd, n_layer); + vout3d->data = out; + out += ggml_nbytes(vout3d); + + ggml_tensor *k3d = + ggml_view_3d(cpy_ctx, kv_self.k, n_embd, kv_ntok, n_layer, + elt_size * n_embd, elt_size * n_embd * n_ctx, 0); + + ggml_tensor *v3d = + ggml_view_3d(cpy_ctx, kv_self.v, kv_ntok, n_embd, n_layer, + elt_size * n_ctx, elt_size * n_ctx * n_embd, 0); + + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d)); + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d)); + ggml_graph_compute(cpy_ctx, &gf); + + ggml_free(cpy_ctx); + } + } + + const size_t written = out - dst; + const size_t max_size = llama_get_state_size(ctx); + + LLAMA_ASSERT(written <= max_size); + + return written; +} + +// Sets the state reading from the specified source address +size_t llama_set_state_data(struct llama_context *ctx, uint8_t *src) { + uint8_t *inp = src; + + // set rng + { + size_t rng_size; + char rng_buf[LLAMA_MAX_RNG_STATE]; + + memcpy(&rng_size, inp, sizeof(rng_size)); + inp += sizeof(rng_size); + memcpy(&rng_buf[0], inp, LLAMA_MAX_RNG_STATE); + inp += LLAMA_MAX_RNG_STATE; + + std::stringstream rng_ss; + rng_ss.str(std::string(&rng_buf[0], rng_size)); + rng_ss >> ctx->rng; + + LLAMA_ASSERT(rng_ss.fail() == false); + } + + // set logits + { + size_t logits_cap; + size_t logits_size; + + memcpy(&logits_cap, inp, sizeof(logits_cap)); + inp += sizeof(logits_cap); + memcpy(&logits_size, inp, sizeof(logits_size)); + inp += sizeof(logits_size); + + LLAMA_ASSERT(ctx->logits.capacity() == logits_cap); + + if (logits_size) { + ctx->logits.resize(logits_size); + memcpy(ctx->logits.data(), inp, logits_size * sizeof(float)); + } + + inp += logits_cap * sizeof(float); + } + + // set embeddings + { + size_t embedding_size; + + memcpy(&embedding_size, inp, sizeof(embedding_size)); + inp += sizeof(embedding_size); + + LLAMA_ASSERT(ctx->embedding.capacity() == embedding_size); + + if (embedding_size) { + memcpy(ctx->embedding.data(), inp, embedding_size * sizeof(float)); + inp += embedding_size * sizeof(float); + } + } + + // set kv cache + { + const auto &kv_self = ctx->model.kv_self; + const auto &hparams = ctx->model.hparams; + const int n_layer = hparams.n_layer; + const int n_embd = hparams.n_embd; + const int n_ctx = hparams.n_ctx; + + size_t kv_size; + int kv_ntok; + + memcpy(&kv_size, inp, sizeof(kv_size)); + inp += sizeof(kv_size); + memcpy(&kv_ntok, inp, sizeof(kv_ntok)); + inp += sizeof(kv_ntok); + + if (kv_size) { + LLAMA_ASSERT(kv_self.buf.size == kv_size); + + const size_t elt_size = ggml_element_size(kv_self.k); + + char buffer[4096]; + + ggml_context *cpy_ctx = + ggml_init({sizeof(buffer), buffer, /* no_alloc */ true}); + ggml_cgraph gf{}; + gf.n_threads = 1; + + ggml_tensor *kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, + kv_ntok, n_layer); + kin3d->data = (void *)inp; + inp += ggml_nbytes(kin3d); + + ggml_tensor *vin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.v->type, kv_ntok, + n_embd, n_layer); + vin3d->data = (void *)inp; + inp += ggml_nbytes(vin3d); + + ggml_tensor *k3d = + ggml_view_3d(cpy_ctx, kv_self.k, n_embd, kv_ntok, n_layer, + elt_size * n_embd, elt_size * n_embd * n_ctx, 0); + + ggml_tensor *v3d = + ggml_view_3d(cpy_ctx, kv_self.v, kv_ntok, n_embd, n_layer, + elt_size * n_ctx, elt_size * n_ctx * n_embd, 0); + + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d)); + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d)); + ggml_graph_compute(cpy_ctx, &gf); + + ggml_free(cpy_ctx); + } + + ctx->model.kv_self.n = kv_ntok; + } + + const size_t nread = inp - src; + const size_t max_size = llama_get_state_size(ctx); + + LLAMA_ASSERT(nread <= max_size); + + return nread; +} + +bool llama_load_session_file(struct llama_context *ctx, + const char *path_session, llama_token *tokens_out, + size_t n_token_capacity, + size_t *n_token_count_out) { + llama_file file(path_session, "rb"); + + // sanity checks + { + const uint32_t magic = file.read_u32(); + const uint32_t version = file.read_u32(); + + if (magic != LLAMA_SESSION_MAGIC || version != LLAMA_SESSION_VERSION) { + fprintf(stderr, + "%s : unknown (magic, version) for session file: %08x, %08x\n", + __func__, magic, version); + return false; + } + + llama_hparams session_hparams; + file.read_raw(&session_hparams, sizeof(llama_hparams)); + + if (session_hparams != ctx->model.hparams) { + fprintf(stderr, "%s : model hparams didn't match from session file!\n", + __func__); + return false; + } + } + + // load the prompt + { + const uint32_t n_token_count = file.read_u32(); + + if (n_token_count > n_token_capacity) { + fprintf(stderr, + "%s : token count in session file exceeded capacity! %u > %zu\n", + __func__, n_token_count, n_token_capacity); + return false; + } + + file.read_raw(tokens_out, sizeof(llama_token) * n_token_count); + *n_token_count_out = n_token_count; + } + + // restore the context state + { + const size_t n_state_size_cur = file.size - file.tell(); + const size_t n_state_size_max = llama_get_state_size(ctx); + + if (n_state_size_cur > n_state_size_max) { + fprintf( + stderr, + "%s : the state size in session file is too big! max %zu, got %zu\n", + __func__, n_state_size_max, n_state_size_cur); + return false; + } + + std::vector state_data(n_state_size_max); + file.read_raw(state_data.data(), n_state_size_cur); + + llama_set_state_data(ctx, state_data.data()); + } + + return true; +} + +bool llama_save_session_file(struct llama_context *ctx, + const char *path_session, + const llama_token *tokens, size_t n_token_count) { + llama_file file(path_session, "wb"); + + file.write_u32(LLAMA_SESSION_MAGIC); + file.write_u32(LLAMA_SESSION_VERSION); + + file.write_raw(&ctx->model.hparams, sizeof(llama_hparams)); + + // save the prompt + file.write_u32((uint32_t)n_token_count); + file.write_raw(tokens, sizeof(llama_token) * n_token_count); + + // save the context state + { + const size_t n_state_size_max = llama_get_state_size(ctx); + + std::vector state_data(n_state_size_max); + const size_t n_state_size_cur = + llama_copy_state_data(ctx, state_data.data()); + + file.write_raw(state_data.data(), n_state_size_cur); + } + + return true; +} + +int llama_eval(struct llama_context *ctx, const llama_token *tokens, + int n_tokens, int n_past, int n_threads) { + if (!llama_eval_internal(*ctx, tokens, n_tokens, n_past, n_threads, + nullptr)) { + fprintf(stderr, "%s: failed to eval\n", __func__); + return 1; + } + + // get a more accurate load time, upon first eval + // TODO: fix this + if (!ctx->has_evaluated_once) { + ctx->t_load_us = ggml_time_us() - ctx->t_start_us; + ctx->has_evaluated_once = true; + } + + return 0; +} + +int llama_eval_export(struct llama_context *ctx, const char *fname) { + const int n_batch = 1; + const int n_ctx = 512 - n_batch; + + const std::vector tmp(n_batch, llama_token_bos()); + + if (!llama_eval_internal(*ctx, tmp.data(), tmp.size(), n_ctx, 1, fname)) { + fprintf(stderr, "%s: failed to eval\n", __func__); + return 1; + } + + return 0; +} + +int llama_tokenize(struct llama_context *ctx, const char *text, + llama_token *tokens, int n_max_tokens, bool add_bos) { + auto res = llama_tokenize(ctx->vocab, text, add_bos); + + if (n_max_tokens < (int)res.size()) { + fprintf(stderr, "%s: too many tokens\n", __func__); + return -((int)res.size()); + } + + for (size_t i = 0; i < res.size(); i++) { + tokens[i] = res[i]; + } + + return res.size(); +} + +int llama_n_vocab(const struct llama_context *ctx) { + return ctx->vocab.id_to_token.size(); +} + +int llama_n_ctx(const struct llama_context *ctx) { + return ctx->model.hparams.n_ctx; +} + +int llama_n_embd(const struct llama_context *ctx) { + return ctx->model.hparams.n_embd; +} + +int llama_get_vocab(const struct llama_context *ctx, const char **strings, + float *scores, int capacity) { + int n = std::min(capacity, (int)ctx->vocab.id_to_token.size()); + for (int i = 0; i < n; ++i) { + strings[i] = ctx->vocab.id_to_token[i].tok.c_str(); + scores[i] = ctx->vocab.id_to_token[i].score; + } + return n; +} + +float *llama_get_logits(struct llama_context *ctx) { + return ctx->logits.data(); +} + +float *llama_get_embeddings(struct llama_context *ctx) { + return ctx->embedding.data(); +} + +const char *llama_token_to_str(const struct llama_context *ctx, + llama_token token) { + if (token >= llama_n_vocab(ctx)) { + return nullptr; + } + + return ctx->vocab.id_to_token[token].tok.c_str(); +} + +llama_token llama_token_bos() { return 1; } + +llama_token llama_token_eos() { return 2; } + +llama_token llama_token_nl() { return 13; } + +void llama_print_timings(struct llama_context *ctx) { + const int64_t t_end_us = ggml_time_us(); + + const int32_t n_sample = std::max(1, ctx->n_sample); + const int32_t n_eval = std::max(1, ctx->n_eval); + const int32_t n_p_eval = std::max(1, ctx->n_p_eval); + + fprintf(stderr, "\n"); + fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, + ctx->t_load_us / 1000.0); + fprintf(stderr, + "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, " + "%8.2f tokens per second)\n", + __func__, 1e-3 * ctx->t_sample_us, n_sample, + 1e-3 * ctx->t_sample_us / n_sample, + 1e6 / ctx->t_sample_us * n_sample); + fprintf(stderr, + "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, " + "%8.2f tokens per second)\n", + __func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, + 1e-3 * ctx->t_p_eval_us / n_p_eval, + 1e6 / ctx->t_p_eval_us * n_p_eval); + fprintf(stderr, + "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, " + "%8.2f tokens per second)\n", + __func__, 1e-3 * ctx->t_eval_us, n_eval, + 1e-3 * ctx->t_eval_us / n_eval, 1e6 / ctx->t_eval_us * n_eval); + fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, + (t_end_us - ctx->t_start_us) / 1000.0); +} + +void llama_reset_timings(struct llama_context *ctx) { + ctx->t_start_us = ggml_time_us(); + ctx->t_sample_us = ctx->n_sample = 0; + ctx->t_eval_us = ctx->n_eval = 0; + ctx->t_p_eval_us = ctx->n_p_eval = 0; +} + +const char *llama_print_system_info(void) { + static std::string s; + + s = ""; + s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | "; + s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | "; + s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | "; + s += "AVX512_VBMI = " + std::to_string(ggml_cpu_has_avx512_vbmi()) + " | "; + s += "AVX512_VNNI = " + std::to_string(ggml_cpu_has_avx512_vnni()) + " | "; + s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | "; + s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | "; + s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | "; + s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | "; + s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | "; + s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | "; + s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | "; + s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | "; + s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | "; + + return s.c_str(); +} + +// For internal test use +std::vector> + &llama_internal_get_tensor_map(struct llama_context *ctx) { + return ctx->model.tensors_by_name; +} diff --git a/ctransformers/models/ggml/llama.h b/ctransformers/models/ggml/llama.h new file mode 100644 index 0000000000000000000000000000000000000000..1241ba6c0ec443dbd1ba7d3ca5f6102184c82499 --- /dev/null +++ b/ctransformers/models/ggml/llama.h @@ -0,0 +1,318 @@ +#ifndef LLAMA_H +#define LLAMA_H + +#include "ggml.h" +#ifdef GGML_USE_CUBLAS +#include "ggml-cuda.h" +#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES +#else +#define LLAMA_MAX_DEVICES 1 +#endif // GGML_USE_CUBLAS +#include +#include +#include + +#ifdef LLAMA_SHARED +# if defined(_WIN32) && !defined(__MINGW32__) +# ifdef LLAMA_BUILD +# define LLAMA_API __declspec(dllexport) +# else +# define LLAMA_API __declspec(dllimport) +# endif +# else +# define LLAMA_API __attribute__ ((visibility ("default"))) +# endif +#else +# define LLAMA_API +#endif + +#define LLAMA_FILE_MAGIC_GGJT 0x67676a74u // 'ggjt' +#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla' +#define LLAMA_FILE_MAGIC_GGMF 0x67676d66u // 'ggmf' +#define LLAMA_FILE_MAGIC_GGML 0x67676d6cu // 'ggml' +#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn' + +#define LLAMA_FILE_VERSION 3 +#define LLAMA_FILE_MAGIC LLAMA_FILE_MAGIC_GGJT +#define LLAMA_FILE_MAGIC_UNVERSIONED LLAMA_FILE_MAGIC_GGML +#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN +#define LLAMA_SESSION_VERSION 1 + +#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) +// Defined when llama.cpp is compiled with support for offloading model layers to GPU. +#define LLAMA_SUPPORTS_GPU_OFFLOAD +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + // + // C interface + // + // TODO: show sample usage + // + + struct llama_context; + + typedef int llama_token; + + typedef struct llama_token_data { + llama_token id; // token id + float logit; // log-odds of the token + float p; // probability of the token + } llama_token_data; + + typedef struct llama_token_data_array { + llama_token_data * data; + size_t size; + bool sorted; + } llama_token_data_array; + + typedef void (*llama_progress_callback)(float progress, void *ctx); + + struct llama_context_params { + int n_ctx; // text context + int n_batch; // prompt processing batch size + int n_gpu_layers; // number of layers to store in VRAM + int main_gpu; // the GPU that is used for scratch and small tensors + float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs + bool low_vram; // if true, reduce VRAM usage at the cost of performance + int seed; // RNG seed, -1 for random + + bool f16_kv; // use fp16 for KV cache + bool logits_all; // the llama_eval() call computes all logits, not just the last one + bool vocab_only; // only load the vocabulary, no weights + bool use_mmap; // use mmap if possible + bool use_mlock; // force system to keep model in RAM + bool embedding; // embedding mode only + + // called with a progress value between 0 and 1, pass NULL to disable + llama_progress_callback progress_callback; + // context pointer passed to the progress callback + void * progress_callback_user_data; + }; + + // model file types + enum llama_ftype { + LLAMA_FTYPE_ALL_F32 = 0, + LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 + // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed + // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed + LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q2_K = 10,// except 1d tensors + LLAMA_FTYPE_MOSTLY_Q3_K_S = 11,// except 1d tensors + LLAMA_FTYPE_MOSTLY_Q3_K_M = 12,// except 1d tensors + LLAMA_FTYPE_MOSTLY_Q3_K_L = 13,// except 1d tensors + LLAMA_FTYPE_MOSTLY_Q4_K_S = 14,// except 1d tensors + LLAMA_FTYPE_MOSTLY_Q4_K_M = 15,// except 1d tensors + LLAMA_FTYPE_MOSTLY_Q5_K_S = 16,// except 1d tensors + LLAMA_FTYPE_MOSTLY_Q5_K_M = 17,// except 1d tensors + LLAMA_FTYPE_MOSTLY_Q6_K = 18,// except 1d tensors + }; + + // model quantization parameters + typedef struct llama_model_quantize_params { + int nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency() + enum llama_ftype ftype; // quantize to this llama_ftype + bool allow_requantize; // allow quantizing non-f32/f16 tensors + bool quantize_output_tensor; // quantize output.weight + } llama_model_quantize_params; + + LLAMA_API struct llama_context_params llama_context_default_params(); + LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params(); + + LLAMA_API bool llama_mmap_supported(); + LLAMA_API bool llama_mlock_supported(); + + // TODO: not great API - very likely to change + // Initialize the llama + ggml backend + // Call once at the start of the program + LLAMA_API void llama_init_backend(); + + LLAMA_API int64_t llama_time_us(); + + // Various functions for loading a ggml llama model. + // Allocate (almost) all memory needed for the model. + // Return NULL on failure + LLAMA_API struct llama_context * llama_init_from_file( + const char * path_model, + struct llama_context_params params); + + // Frees all allocated memory + LLAMA_API void llama_free(struct llama_context * ctx); + + // Returns 0 on success + LLAMA_API int llama_model_quantize( + const char * fname_inp, + const char * fname_out, + const llama_model_quantize_params * params); + + // Apply a LoRA adapter to a loaded model + // path_base_model is the path to a higher quality model to use as a base for + // the layers modified by the adapter. Can be NULL to use the current loaded model. + // The model needs to be reloaded before applying a new adapter, otherwise the adapter + // will be applied on top of the previous one + // Returns 0 on success + LLAMA_API int llama_apply_lora_from_file( + struct llama_context * ctx, + const char * path_lora, + const char * path_base_model, + int n_threads); + + // Returns the number of tokens in the KV cache + LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx); + + // Sets the current rng seed. + LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed); + + // Returns the maximum size in bytes of the state (rng, logits, embedding + // and kv_cache) - will often be smaller after compacting tokens + LLAMA_API size_t llama_get_state_size(const struct llama_context * ctx); + + // Copies the state to the specified destination address. + // Destination needs to have allocated enough memory. + // Returns the number of bytes copied + LLAMA_API size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst); + + // Set the state reading from the specified address + // Returns the number of bytes read + LLAMA_API size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src); + + // Save/load session file + LLAMA_API bool llama_load_session_file(struct llama_context * ctx, const char * path_session, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out); + LLAMA_API bool llama_save_session_file(struct llama_context * ctx, const char * path_session, const llama_token * tokens, size_t n_token_count); + + // Run the llama inference to obtain the logits and probabilities for the next token. + // tokens + n_tokens is the provided batch of new tokens to process + // n_past is the number of tokens to use from previous eval calls + // Returns 0 on success + LLAMA_API int llama_eval( + struct llama_context * ctx, + const llama_token * tokens, + int n_tokens, + int n_past, + int n_threads); + + // Export a static computation graph for context of 511 and batch size of 1 + // NOTE: since this functionality is mostly for debugging and demonstration purposes, we hardcode these + // parameters here to keep things simple + // IMPORTANT: do not use for anything else other than debugging and testing! + LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname); + + // Convert the provided text into tokens. + // The tokens pointer must be large enough to hold the resulting tokens. + // Returns the number of tokens on success, no more than n_max_tokens + // Returns a negative number on failure - the number of tokens that would have been returned + // TODO: not sure if correct + LLAMA_API int llama_tokenize( + struct llama_context * ctx, + const char * text, + llama_token * tokens, + int n_max_tokens, + bool add_bos); + + LLAMA_API int llama_n_vocab(const struct llama_context * ctx); + LLAMA_API int llama_n_ctx (const struct llama_context * ctx); + LLAMA_API int llama_n_embd (const struct llama_context * ctx); + + // Get the vocabulary as output parameters. + // Returns number of results. + LLAMA_API int llama_get_vocab( + const struct llama_context * ctx, + const char * * strings, + float * scores, + int capacity); + + // Token logits obtained from the last call to llama_eval() + // The logits for the last token are stored in the last row + // Can be mutated in order to change the probabilities of the next token + // Rows: n_tokens + // Cols: n_vocab + LLAMA_API float * llama_get_logits(struct llama_context * ctx); + + // Get the embeddings for the input + // shape: [n_embd] (1-dimensional) + LLAMA_API float * llama_get_embeddings(struct llama_context * ctx); + + // Token Id -> String. Uses the vocabulary in the provided context + LLAMA_API const char * llama_token_to_str(const struct llama_context * ctx, llama_token token); + + // Special tokens + LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence + LLAMA_API llama_token llama_token_eos(); // end-of-sentence + LLAMA_API llama_token llama_token_nl(); // next-line + + // Sampling functions + + /// @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix. + LLAMA_API void llama_sample_repetition_penalty(struct llama_context * ctx, llama_token_data_array * candidates, const llama_token * last_tokens, size_t last_tokens_size, float penalty); + + /// @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. + LLAMA_API void llama_sample_frequency_and_presence_penalties(struct llama_context * ctx, llama_token_data_array * candidates, const llama_token * last_tokens, size_t last_tokens_size, float alpha_frequency, float alpha_presence); + + /// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits. + LLAMA_API void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates); + + /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 + LLAMA_API void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep); + + /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 + LLAMA_API void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep); + + /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/. + LLAMA_API void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep); + + /// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666. + LLAMA_API void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep); + LLAMA_API void llama_sample_temperature(struct llama_context * ctx, llama_token_data_array * candidates, float temp); + + /// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. + /// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. + /// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. + /// @param eta The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. + /// @param m The number of tokens considered in the estimation of `s_hat`. This is an arbitrary value that is used to calculate `s_hat`, which in turn helps to calculate the value of `k`. In the paper, they use `m = 100`, but you can experiment with different values to see how it affects the performance of the algorithm. + /// @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal. + LLAMA_API llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int m, float * mu); + + /// @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. + /// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. + /// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. + /// @param eta The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. + /// @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal. + LLAMA_API llama_token llama_sample_token_mirostat_v2(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, float * mu); + + /// @details Selects the token with the highest probability. + LLAMA_API llama_token llama_sample_token_greedy(struct llama_context * ctx, llama_token_data_array * candidates); + + /// @details Randomly selects a token from the candidates based on their probabilities. + LLAMA_API llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates); + + // Performance information + LLAMA_API void llama_print_timings(struct llama_context * ctx); + LLAMA_API void llama_reset_timings(struct llama_context * ctx); + + // Print system information + LLAMA_API const char * llama_print_system_info(void); + +#ifdef __cplusplus +} +#endif + +// Internal API to be implemented by llama.cpp and used by tests/benchmarks only +#ifdef LLAMA_API_INTERNAL + +#include +#include +struct ggml_tensor; + +std::vector>& llama_internal_get_tensor_map(struct llama_context * ctx); + +#endif + +#endif // LLAMA_H diff --git a/ctransformers/models/llm.cc b/ctransformers/models/llm.cc new file mode 100644 index 0000000000000000000000000000000000000000..20d496a095a09af5318a2f1e38238db10e8ce766 --- /dev/null +++ b/ctransformers/models/llm.cc @@ -0,0 +1,112 @@ +#include "llm.h" + +#include "llms/dolly.cc" +#include "llms/gpt-neox.cc" +#include "llms/gpt2.cc" +#include "llms/gptj.cc" +#include "llms/llama.cc" +#include "llms/mpt.cc" +#include "llms/replit.cc" +#include "llms/starcoder.cc" + +// Import falcon after llama. +#include "llms/falcon.cc" + +#ifdef __cplusplus +extern "C" { +#endif + +LLM* ctransformers_llm_create(const char* model_path, const char* model_type, + const int context_length, const int gpu_layers) { + std::string type = model_type; + // Remove non-alphanumeric characters from model type. + type.erase(std::remove_if(type.begin(), type.end(), + [](const char c) { return !std::isalnum(c); }), + type.end()); + + LLM* llm = nullptr; + if (type == "dollyv2") { + llm = new dollyv2_llm; + } else if (type == "falcon") { + llm = new falcon_llm; + } else if (type == "gpt2") { + llm = new gpt2_llm; + } else if (type == "gptj") { + llm = new gptj_llm; + } else if (type == "gptneox") { + llm = new gpt_neox_llm; + } else if (type == "llama") { + llm = new llama_llm; + } else if (type == "mpt") { + llm = new mpt_llm; + } else if (type == "replit") { + llm = new replit_llm; + } else if (type == "starcoder") { + llm = new starcoder_llm; + } + + if (llm == nullptr) { + fprintf(stderr, "Model type '%s' is not supported.\n", model_type); + return nullptr; + } + if (!llm->Init(model_path, context_length, gpu_layers)) { + delete llm; + return nullptr; + } + return llm; +} + +void ctransformers_llm_delete(LLM* llm) { delete llm; } + +int ctransformers_llm_tokenize(LLM* llm, const char* text, int* output) { + const std::vector tokens = llm->Tokenize(text); + std::copy(tokens.begin(), tokens.end(), output); + return tokens.size(); +} + +const char* ctransformers_llm_detokenize(LLM* llm, const int token) { + return llm->Detokenize(token).c_str(); +} + +bool ctransformers_llm_is_eos_token(LLM* llm, const int token) { + return llm->IsEosToken(token); +} + +int ctransformers_llm_eos_token_id(LLM* llm) { return llm->EosToken(); } + +int ctransformers_llm_vocab_size(LLM* llm) { return llm->VocabSize(); } + +int ctransformers_llm_context_length(LLM* llm) { return llm->ContextLength(); } + +bool ctransformers_llm_batch_eval(LLM* llm, const int* tokens, + const int n_tokens, const int batch_size, + const int threads) { + return llm->BatchEval(std::vector(tokens, tokens + n_tokens), + batch_size, threads); +} + +float* ctransformers_llm_logits_data(LLM* llm) { return llm->Logits().data(); } + +int ctransformers_llm_logits_size(LLM* llm) { return llm->Logits().size(); } + +const float* ctransformers_llm_embeddings_data(LLM* llm) { + return llm->Embeddings().data(); +} + +int ctransformers_llm_embeddings_size(LLM* llm) { + return llm->Embeddings().size(); +} + +int ctransformers_llm_sample(LLM* llm, const int top_k, const float top_p, + const float temperature, + const float repetition_penalty, + const int last_n_tokens, const int seed) { + return llm->Sample(top_k, top_p, temperature, repetition_penalty, + last_n_tokens, seed); +} + +void ctransformers_llm_reset(LLM* llm) { llm->Reset(); } + +#ifdef __cplusplus +} +#endif diff --git a/ctransformers/models/llm.h b/ctransformers/models/llm.h new file mode 100644 index 0000000000000000000000000000000000000000..4f61809fb1586e9fc04653c3146f4bd0142b101d --- /dev/null +++ b/ctransformers/models/llm.h @@ -0,0 +1,222 @@ +#ifndef CTRANSFORMERS_MODELS_LLM_H_ +#define CTRANSFORMERS_MODELS_LLM_H_ + +#include "common.h" + +// https://github.com/marella/train/blob/3c4ba1f59bf20e31f7ee5ea9a8f38e49440a93f7/train/state.py#L135-L175 +class RingBuffer { + public: + void Init(const int capacity) { + capacity_ = capacity; + Clear(); + } + + void Add(const gpt_vocab::id token) { + if (Size() < capacity_) { + tokens_.push_back(token); + } else { + tokens_[pos_] = token; + } + pos_ = (pos_ + 1) % capacity_; + } + + // Returns last n tokens. + std::unordered_set GetRecent(int n) const { + const int size = Size(); + n = std::min(size, n); + std::unordered_set result; + if (n == 0) { + return result; + } + const int start = (pos_ - n + size) % size; + if (start < pos_) { + result.insert(tokens_.begin() + start, tokens_.begin() + pos_); + } else { + result.insert(tokens_.begin() + start, tokens_.end()); + result.insert(tokens_.begin(), tokens_.begin() + pos_); + } + return result; + } + + void Clear() { + tokens_.clear(); + pos_ = 0; + } + + int Size() const { return tokens_.size(); } + + private: + int capacity_; + std::vector tokens_; + int pos_ = 0; +}; + +class LLM { + public: + virtual ~LLM(){}; + + bool Init(const std::string &filename, const int context_length, + const int gpu_layers) { + if (initialized_) { + return false; + } + if (!Load(filename, context_length, gpu_layers)) { + return false; + } + previous_tokens_.Init(ContextLength()); + return initialized_ = true; + } + + virtual std::vector Tokenize(const std::string &text) const { + return gpt_tokenize(vocab_, text); + } + + virtual const std::string &Detokenize(const gpt_vocab::id id) const { + const auto it = vocab_.id_to_token.find(id); + if (it == vocab_.id_to_token.end()) { + return kEmptyString; + } + return it->second; + } + + bool BatchEval(const std::vector &tokens, int batch_size, + const int threads) { + batch_size = std::min(ContextLength(), batch_size); + const int size = tokens.size(); + for (int start = 0; start < size; start += batch_size) { + const int end = std::min(start + batch_size, (int)tokens.size()); + const std::vector batch(tokens.begin() + start, + tokens.begin() + end); + if (!EvalInternal(batch, threads)) { + return false; + } + } + return true; + } + + virtual std::vector &Logits() { return logits_; } + + virtual const std::vector &Embeddings() const { return embeddings_; } + + virtual gpt_vocab::id Sample(const int top_k, const float top_p, + const float temperature, + const float repetition_penalty, + int last_n_tokens, int seed) const { + if (logits_.empty()) { + return EosToken(); + } + if (last_n_tokens < 0) { + last_n_tokens = ContextLength(); + } + if (seed < 0) { + seed = time(nullptr); + } + std::mt19937 rng(seed); + + std::unordered_set recent_tokens; + if (repetition_penalty != 1.0f) { + recent_tokens = previous_tokens_.GetRecent(last_n_tokens); + } + + return gpt_sample_top_k_top_p( + vocab_, logits_.data() + (logits_.size() - VocabSize()), top_k, top_p, + temperature, repetition_penalty, recent_tokens, rng); + } + + virtual bool IsEosToken(const gpt_vocab::id token) const { + if (token == EosToken()) { + return true; + } + // Handle special tokens in StarChat and Dolly V2. + if (!vocab_.special_tokens.empty()) { + const std::string &text = Detokenize(token); + return text == "<|end|>" || text == "### End"; + } + return false; + } + + virtual gpt_vocab::id EosToken() const { + const auto it = vocab_.token_to_id.find("<|endoftext|>"); + if (it != vocab_.token_to_id.end()) { + return it->second; + } + return 0; + } + + virtual int VocabSize() const { return vocab_.id_to_token.size(); } + + int ContextLength() const { return n_ctx_; } + + void Reset() { + logits_.clear(); + previous_tokens_.Clear(); + } + + protected: + const std::string kEmptyString = ""; + int n_ctx_ = -1; + gpt_vocab vocab_; + size_t mem_per_token_ = 0; + std::vector logits_; + std::vector embeddings_; + RingBuffer previous_tokens_; + + virtual bool Load(const std::string &filename, const int context_length, + const int gpu_layers) = 0; + + virtual bool Eval(const std::vector &tokens, const int threads, + const int n_past) = 0; + + private: + bool initialized_ = false; + + bool EvalInternal(const std::vector &tokens, int threads) { + if (threads < 0) { + threads = std::min((int)std::thread::hardware_concurrency(), 4); + } + threads = std::max(threads, 1); + const int n_past = + std::min(ContextLength() - (int)tokens.size(), previous_tokens_.Size()); + if (!Eval(tokens, threads, n_past)) { + return false; + } + for (const gpt_vocab::id token : tokens) { + previous_tokens_.Add(token); + } + return true; + } +}; + +#define REGISTER_LLM(_name) \ + class _name##_llm : public LLM { \ + public: \ + virtual ~_name##_llm() { \ + if (model_.ctx != nullptr) { \ + ggml_free(model_.ctx); \ + } \ + } \ + \ + protected: \ + bool Load(const std::string &filename, const int context_length, \ + const int gpu_layers) override { \ + if (context_length > 0) { \ + model_.hparams.n_ctx = context_length; \ + } \ + if (!_name##_model_load(filename, model_, vocab_)) { \ + return false; \ + } \ + n_ctx_ = model_.hparams.n_ctx; \ + return true; \ + } \ + \ + bool Eval(const std::vector &tokens, const int threads, \ + const int n_past) override { \ + return _name##_eval(model_, threads, n_past, tokens, logits_, \ + mem_per_token_); \ + } \ + \ + private: \ + _name##_model model_; \ + } + +#endif diff --git a/ctransformers/models/llms/dolly.cc b/ctransformers/models/llms/dolly.cc new file mode 100644 index 0000000000000000000000000000000000000000..c3fa492c71bf2034674f8dc8c49d94d3350e1d3c --- /dev/null +++ b/ctransformers/models/llms/dolly.cc @@ -0,0 +1,651 @@ +#include "llm.h" + +// https://github.com/ggerganov/ggml/blob/master/examples/dolly-v2/main.cpp + +// default hparams (Dolly-V2 3B) +struct dollyv2_hparams { + int32_t n_vocab = 50254; // tokenizer.vocab_size + int32_t n_ctx = 2048; // model.config.max_position_embeddings + int32_t n_embd = 2560; // model.config.hidden_size + int32_t n_head = 32; // model.config.num_attention_heads + int32_t n_layer = 32; // model.config.num_hidden_layers + int32_t n_rot = 20; // rotary_pct[25%] * (n_embd / n_head) + int32_t par_res = 1; // 1 = true, 0 = false + int32_t ftype = GGML_FTYPE_MOSTLY_F16; +}; + +struct dollyv2_layer { + // pre normalization + struct ggml_tensor *ln_1_g; + struct ggml_tensor *ln_1_b; + + // attention + struct ggml_tensor *c_attn_attn_w; + struct ggml_tensor *c_attn_attn_b; + + struct ggml_tensor *c_attn_proj_w; + struct ggml_tensor *c_attn_proj_b; + + // post normalization + struct ggml_tensor *ln_2_g; + struct ggml_tensor *ln_2_b; + + // ff + struct ggml_tensor *c_mlp_fc_w; + struct ggml_tensor *c_mlp_fc_b; + + struct ggml_tensor *c_mlp_proj_w; + struct ggml_tensor *c_mlp_proj_b; +}; + +struct dollyv2_model { + dollyv2_hparams hparams; + + // normalization + struct ggml_tensor *ln_f_g; + struct ggml_tensor *ln_f_b; + + struct ggml_tensor *wte; // position embedding + + struct ggml_tensor *lmh_g; // language model head + // struct ggml_tensor * lmh_b; // language model bias + + std::vector layers; + + // key + value memory + struct ggml_tensor *memory_k; + struct ggml_tensor *memory_v; + + // + struct ggml_context *ctx; + std::map tensors; +}; + +// load the model's weights from a file +bool dollyv2_model_load(const std::string &fname, dollyv2_model &model, + gpt_vocab &vocab) { + auto fin = std::ifstream(fname, std::ios::binary); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); + return false; + } + + // verify magic + { + uint32_t magic; + fin.read((char *)&magic, sizeof(magic)); + if (magic != 0x67676d6c) { + fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, + fname.c_str()); + return false; + } + } + + // load hparams + { + auto &hparams = model.hparams; + + fin.read((char *)&hparams.n_vocab, sizeof(hparams.n_vocab)); + fin.read((char *)&hparams.n_ctx, sizeof(hparams.n_ctx)); + fin.read((char *)&hparams.n_embd, sizeof(hparams.n_embd)); + fin.read((char *)&hparams.n_head, sizeof(hparams.n_head)); + fin.read((char *)&hparams.n_layer, sizeof(hparams.n_layer)); + fin.read((char *)&hparams.n_rot, sizeof(hparams.n_rot)); + fin.read((char *)&hparams.par_res, sizeof(hparams.par_res)); + fin.read((char *)&hparams.ftype, sizeof(hparams.ftype)); + + const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; + + hparams.ftype %= GGML_QNT_VERSION_FACTOR; + } + + // load vocab + { + const int32_t n_vocab = model.hparams.n_vocab; + + std::string word; + std::vector buf(128); + + for (int i = 0; i < n_vocab; i++) { + uint32_t len; + fin.read((char *)&len, sizeof(len)); + + buf.resize(len); + fin.read((char *)buf.data(), len); + word.assign(buf.data(), len); + + vocab.token_to_id[word] = i; + vocab.id_to_token[i] = word; + } + + vocab.add_special_token("### End"); + vocab.add_special_token("### Instruction:"); + vocab.add_special_token("### Response:"); + } + + // for the big tensors, we have the option to store the data in 16-bit floats + // or quantized in order to save memory and also to speed up the computation + ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype)(model.hparams.ftype)); + if (wtype == GGML_TYPE_COUNT) { + fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", + __func__, fname.c_str(), model.hparams.ftype); + return false; + } + + auto &ctx = model.ctx; + + size_t ctx_size = 0; + + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_vocab = hparams.n_vocab; + + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_g + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_b + + ctx_size += n_embd * n_vocab * ggml_type_sizef(wtype); // wte + + ctx_size += n_embd * n_vocab * ggml_type_sizef(wtype); // lmh_g + // ctx_size += n_vocab*ggml_type_sizef(GGML_TYPE_F32); // lmh_b + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_g + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_b + + ctx_size += n_layer * (3 * n_embd * n_embd * + ggml_type_sizef(wtype)); // c_attn_attn_w + ctx_size += n_layer * + (3 * n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_attn_attn_b + + ctx_size += + n_layer * (n_embd * n_embd * ggml_type_sizef(wtype)); // c_attn_proj_w + ctx_size += n_layer * (n_embd * n_embd * + ggml_type_sizef(GGML_TYPE_F32)); // c_attn_proj_b + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_g + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_b + + ctx_size += + n_layer * (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_fc_w + ctx_size += + n_layer * (4 * n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_fc_b + + ctx_size += n_layer * + (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_proj_w + ctx_size += + n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b + + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F32); // memory_k + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F32); // memory_v + + ctx_size += (6 + 16 * n_layer) * 512; // object overhead + } + + // create the ggml context + { + struct ggml_init_params params = { + /*.mem_size =*/ctx_size, + /*.mem_buffer =*/NULL, + /*.no_alloc =*/false, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return false; + } + } + + // prepare memory for the weights + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_vocab = hparams.n_vocab; + + model.layers.resize(n_layer); + + model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + + model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + model.lmh_g = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + // model.lmh_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_vocab); + + // map by name + model.tensors["gpt_neox.embed_in.weight"] = model.wte; + + model.tensors["gpt_neox.final_layer_norm.weight"] = model.ln_f_g; + model.tensors["gpt_neox.final_layer_norm.bias"] = model.ln_f_b; + + model.tensors["embed_out.weight"] = model.lmh_g; + // model.tensors["lm_head.bias"] = model.lmh_b; + + for (int i = 0; i < n_layer; ++i) { + auto &layer = model.layers[i]; + + layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.c_attn_attn_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 3 * n_embd); + layer.c_attn_attn_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3 * n_embd); + + layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.ln_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4 * n_embd); + layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4 * n_embd); + + layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4 * n_embd, n_embd); + layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + // map by name + + // unmapped: attention.rotary_emb, mlp.act + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".input_layernorm.weight"] = layer.ln_1_g; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".input_layernorm.bias"] = layer.ln_1_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".attention.query_key_value.weight"] = layer.c_attn_attn_w; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".attention.query_key_value.bias"] = layer.c_attn_attn_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".attention.dense.weight"] = layer.c_attn_proj_w; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".attention.dense.bias"] = layer.c_attn_proj_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".post_attention_layernorm.weight"] = layer.ln_2_g; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".post_attention_layernorm.bias"] = layer.ln_2_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".mlp.dense_h_to_4h.weight"] = layer.c_mlp_fc_w; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".mlp.dense_h_to_4h.bias"] = layer.c_mlp_fc_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".mlp.dense_4h_to_h.weight"] = layer.c_mlp_proj_w; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".mlp.dense_4h_to_h.bias"] = layer.c_mlp_proj_b; + } + } + + // key + value memory + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + + const int64_t n_mem = n_layer * n_ctx; + const int64_t n_elements = n_embd * n_mem; + + model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + + const size_t memory_size = + ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); + } + + // load weights + { + int n_tensors = 0; + size_t total_size = 0; + + while (true) { + int32_t n_dims; + int32_t length; + int32_t ttype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ttype), sizeof(ttype)); + + if (fin.eof()) { + break; + } + + int32_t nelements = 1; + int32_t ne[2] = {1, 1}; + for (int i = 0; i < n_dims; ++i) { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + + std::string name(length, 0); + fin.read(&name[0], length); + + if (model.tensors.find(name.data()) == model.tensors.end()) { + fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, + name.data()); + return false; + } + + auto tensor = model.tensors[name.data()]; + if (ggml_nelements(tensor) != nelements) { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", + __func__, name.data()); + return false; + } + + if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { + fprintf(stderr, + "%s: tensor '%s' has wrong shape in model file: got [%5d, " + "%5d], expected [%5d, %5d]\n", + __func__, name.data(), (int)tensor->ne[0], (int)tensor->ne[1], + ne[0], ne[1]); + return false; + } + + // for debugging + if (0) { + printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", + name.data(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), + ggml_nbytes(tensor) / 1024.0 / 1024.0, ggml_nbytes(tensor)); + } + + const size_t bpe = ggml_type_size(ggml_type(ttype)); + + if ((nelements * bpe) / ggml_blck_size(tensor->type) != + ggml_nbytes(tensor)) { + fprintf(stderr, + "%s: tensor '%s' has wrong size in model file: got %zu, " + "expected %zu\n", + __func__, name.data(), ggml_nbytes(tensor), nelements * bpe); + return false; + } + + fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + + total_size += ggml_nbytes(tensor); + } + } + + fin.close(); + + return true; +} + +// feed-forward network +ggml_tensor *gpt_neox_ff(const dollyv2_layer &layer, ggml_context *ctx0, + ggml_tensor *inp) { + ggml_tensor *cur = ggml_norm(ctx0, inp); + + cur = + ggml_add(ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, layer.ln_2_g, cur), cur), + ggml_repeat(ctx0, layer.ln_2_b, cur)); + + cur = ggml_mul_mat(ctx0, layer.c_mlp_fc_w, cur); + + cur = ggml_add(ctx0, ggml_repeat(ctx0, layer.c_mlp_fc_b, cur), cur); + + // GELU activation + cur = ggml_gelu(ctx0, cur); + + // projection + // cur = proj_w*cur + proj_b + cur = ggml_mul_mat(ctx0, layer.c_mlp_proj_w, cur); + + cur = ggml_add(ctx0, ggml_repeat(ctx0, layer.c_mlp_proj_b, cur), cur); + return cur; +} + +// evaluate the transformer +// +// - model: the model +// - n_threads: number of threads to use +// - n_past: the context size so far +// - embd_inp: the embeddings of the tokens in the context +// - embd_w: the predicted logits for the next token +// +bool dollyv2_eval(const dollyv2_model &model, const int n_threads, + const int n_past, const std::vector &embd_inp, + std::vector &embd_w, size_t &mem_per_token) { + const int N = embd_inp.size(); + + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_head = hparams.n_head; + const int n_vocab = hparams.n_vocab; + const int n_rot = hparams.n_rot; + + static size_t buf_size = 256u * 1024 * 1024; + static void *buf = malloc(buf_size); + + if (mem_per_token > 0 && mem_per_token * N > buf_size) { + const size_t buf_size_new = + 1.1 * + (mem_per_token * N); // add 10% to account for ggml object overhead + // printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, + // buf_size, buf_size_new); + + // reallocate + buf_size = buf_size_new; + buf = realloc(buf, buf_size); + if (buf == nullptr) { + fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); + return false; + } + } + + struct ggml_init_params params = { + /*.mem_size =*/buf_size, + /*.mem_buffer =*/buf, + /*.no_alloc =*/false, + }; + + struct ggml_context *ctx0 = ggml_init(params); + struct ggml_cgraph gf = {}; + gf.n_threads = n_threads; + + struct ggml_tensor *embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd)); + + // wte + struct ggml_tensor *inpL = ggml_get_rows(ctx0, model.wte, embd); + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor *cur; + + // self-attention + { + { + cur = ggml_norm(ctx0, inpL); + + cur = ggml_add( + ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_1_g, cur), + cur), + ggml_repeat(ctx0, model.layers[il].ln_1_b, cur)); + } + + // compute QKV + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_attn_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_attn_b, cur), cur); + } + + struct ggml_tensor *Qcur = + ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd / n_head, n_head, N, + cur->nb[1] / n_head, cur->nb[1], + 0 * sizeof(float) * n_embd / n_head)); + struct ggml_tensor *Kcur = + ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd / n_head, n_head, N, + cur->nb[1] / n_head, cur->nb[1], + 1 * sizeof(float) * n_embd / n_head)); + struct ggml_tensor *Vcur = + ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd / n_head, n_head, N, + cur->nb[1] / n_head, cur->nb[1], + 2 * sizeof(float) * n_embd / n_head)); + + // using mode = 2 for GPT-NeoX mode + Qcur = ggml_rope_inplace(ctx0, Qcur, n_past, n_rot, 2); + Kcur = ggml_rope_inplace(ctx0, Kcur, n_past, n_rot, 2); + + // store key and value to memory + { + Vcur = ggml_transpose(ctx0, ggml_reshape_2d(ctx0, Vcur, n_embd, N)); + + struct ggml_tensor *k = + ggml_view_1d(ctx0, model.memory_k, N * n_embd, + (ggml_element_size(model.memory_k) * n_embd) * + (il * n_ctx + n_past)); + struct ggml_tensor *v = ggml_view_2d( + ctx0, model.memory_v, N, n_embd, + (n_ctx)*ggml_element_size(model.memory_v), + (il * n_ctx) * ggml_element_size(model.memory_v) * n_embd + + n_past * ggml_element_size(model.memory_v)); + + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, + // 3) + struct ggml_tensor *Q = ggml_permute(ctx0, Qcur, 0, 2, 1, 3); + + // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) + struct ggml_tensor *K = ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d( + ctx0, model.memory_k, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_k) * n_embd), + n_embd / n_head, n_head, n_past + N), + 0, 2, 1, 3); + + // K * Q + struct ggml_tensor *KQ = ggml_mul_mat(ctx0, K, Q); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + struct ggml_tensor *KQ_scaled = ggml_scale_inplace( + ctx0, KQ, ggml_new_f32(ctx0, 1.0f / sqrt(float(n_embd) / n_head))); + + // KQ_masked = mask_past(KQ_scaled) + struct ggml_tensor *KQ_masked = + ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past); + + // KQ = soft_max(KQ_masked) + struct ggml_tensor *KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); + + // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, + // 3).contiguous() + struct ggml_tensor *V = ggml_view_3d( + ctx0, model.memory_v, n_past + N, n_embd / n_head, n_head, + n_ctx * ggml_element_size(model.memory_v), + n_ctx * ggml_element_size(model.memory_v) * n_embd / n_head, + il * n_ctx * ggml_element_size(model.memory_v) * n_embd); + + // KQV = transpose(V) * KQ_soft_max + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + struct ggml_tensor *KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + + // cur = KQV_merged.contiguous().view(n_embd, N) + cur = ggml_cpy(ctx0, KQV_merged, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_proj_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_proj_b, cur), cur); + } + } + + if (hparams.par_res == 0) { + struct ggml_tensor *inpFF = ggml_add(ctx0, cur, inpL); + + cur = gpt_neox_ff(model.layers[il], ctx0, inpFF); + + // input for next layer + inpL = ggml_add(ctx0, cur, inpFF); + } else { + struct ggml_tensor *inpFF = cur; + + // this is independent of the self-attention result, so it could be done + // in parallel to the self-attention note here we pass inpL instead of cur + cur = gpt_neox_ff(model.layers[il], ctx0, inpL); + + // layer input + FF + cur = ggml_add(ctx0, cur, inpFF); + + // input for next layer + inpL = ggml_add(ctx0, cur, inpL); + } + } + + // norm + { + inpL = ggml_norm(ctx0, inpL); + + // inpL = ln_f_g*inpL + ln_f_b + inpL = ggml_add(ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.ln_f_g, inpL), inpL), + ggml_repeat(ctx0, model.ln_f_b, inpL)); + } + + // lm_head + { + inpL = ggml_mul_mat(ctx0, model.lmh_g, inpL); + + // inpL = ggml_add(ctx0, + // ggml_repeat(ctx0, model.lmh_b, inpL), + // inpL); + } + + // logits -> probs + // inpL = ggml_soft_max_inplace(ctx0, inpL); + + // run the computation + ggml_build_forward_expand(&gf, inpL); + ggml_graph_compute(ctx0, &gf); + + // if (n_past%100 == 0) { + // ggml_graph_print (&gf); + // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); + //} + + // embd_w.resize(n_vocab*N); + // memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); + + // return result for just the last token + embd_w.resize(n_vocab); + memcpy(embd_w.data(), (float *)ggml_get_data(inpL) + (n_vocab * (N - 1)), + sizeof(float) * n_vocab); + + if (mem_per_token == 0) { + mem_per_token = ggml_used_mem(ctx0) / N; + } + // printf("used_mem = %zu\n", ggml_used_mem(ctx0)); + + ggml_free(ctx0); + + return true; +} + +REGISTER_LLM(dollyv2); diff --git a/ctransformers/models/llms/falcon.cc b/ctransformers/models/llms/falcon.cc new file mode 100644 index 0000000000000000000000000000000000000000..d99bef47e8ccb2df3fb8c00f941391eebe98ac7c --- /dev/null +++ b/ctransformers/models/llms/falcon.cc @@ -0,0 +1,112 @@ +#include "llm.h" + +// https://github.com/cmp-nct/ggllm.cpp/blob/master/examples/falcon/falcon_main.cpp + +#include "ggml/libfalcon.cpp" + +class falcon_llm : public LLM { + public: + virtual ~falcon_llm() { + if (ctx_ != nullptr) { + falcon_free(ctx_); + } + } + + std::vector Tokenize(const std::string &text) const override { + return falcon_tokenize(ctx_->vocab, text, /*bos=*/false); + } + + const std::string &Detokenize(const gpt_vocab::id id) const override { + if (id >= falcon_n_vocab(ctx_)) { + return kEmptyString; + } + return ctx_->vocab.id_to_token[id].tok; + } + + bool IsEosToken(const gpt_vocab::id token) const override { + return token == EosToken(); + } + + gpt_vocab::id EosToken() const override { return falcon_token_eos(); } + + int VocabSize() const override { return falcon_n_vocab(ctx_); } + + std::vector &Logits() override { return ctx_->logits; } + + const std::vector &Embeddings() const override { + return ctx_->embedding; + } + + gpt_vocab::id Sample(const int top_k, const float top_p, + const float temperature, const float repetition_penalty, + int last_n_tokens, int seed) const override { + if (last_n_tokens < 0) { + last_n_tokens = ContextLength(); + } + if (seed < 0) { + seed = time(nullptr); + } + ctx_->rng.seed(seed); + + const float *logits = falcon_get_logits(ctx_); + const int n_vocab = falcon_n_vocab(ctx_); + + std::vector candidates; + candidates.reserve(n_vocab); + for (llama_token token_id = 0; token_id < n_vocab; token_id++) { + candidates.emplace_back( + llama_token_data{token_id, logits[token_id], 0.0f}); + } + + llama_token_data_array candidates_p = { + candidates.data(), + candidates.size(), + false, + }; + + { + std::unordered_set recent_tokens_set; + if (repetition_penalty != 1.0f) { + recent_tokens_set = previous_tokens_.GetRecent(last_n_tokens); + } + std::vector recent_tokens(recent_tokens_set.begin(), + recent_tokens_set.end()); + falcon_sample_repetition_penalty( + ctx_, &candidates_p, recent_tokens.data(), recent_tokens.size(), + repetition_penalty); + } + + falcon_sample_top_k(ctx_, &candidates_p, top_k, 1); + falcon_sample_top_p(ctx_, &candidates_p, top_p, 1); + falcon_sample_temperature(ctx_, &candidates_p, temperature); + return falcon_sample_token(ctx_, &candidates_p); + } + + protected: + bool Load(const std::string &filename, const int context_length, + const int gpu_layers) override { + falcon_context_params params = falcon_context_default_params(); + params.embedding = true; + if (context_length > 0) { + params.n_ctx = context_length; + } + params.n_gpu_layers = gpu_layers; + + ctx_ = falcon_init_from_file(filename.c_str(), params); + if (ctx_ == nullptr) { + return false; + } + n_ctx_ = falcon_n_ctx(ctx_); + return true; + } + + bool Eval(const std::vector &tokens, const int threads, + const int n_past) override { + const int status = falcon_eval(ctx_, tokens.data(), tokens.size(), n_past, + threads, /*debug_timings=*/0); + return status == 0; + } + + private: + falcon_context *ctx_ = nullptr; +}; diff --git a/ctransformers/models/llms/gpt-neox.cc b/ctransformers/models/llms/gpt-neox.cc new file mode 100644 index 0000000000000000000000000000000000000000..ca25ec0e62f25ccaf08172058a841fa72cfd0f82 --- /dev/null +++ b/ctransformers/models/llms/gpt-neox.cc @@ -0,0 +1,676 @@ +#include "llm.h" + +// https://github.com/ggerganov/ggml/blob/master/examples/gpt-neox/main.cpp + +// default hparams (StableLM 3B) +struct gpt_neox_hparams { + int32_t n_vocab = 50257; + int32_t n_ctx = 4096; + int32_t n_embd = 4096; + int32_t n_head = 32; + int32_t n_layer = 16; + int32_t n_rot = 32; // rotary_pct * (n_embd / n_head) + int32_t par_res = 1; // 1 = true, 0 = false + int32_t ftype = 1; +}; + +struct gpt_neox_layer { + // pre normalization + struct ggml_tensor *ln_1_g; + struct ggml_tensor *ln_1_b; + + // attention + struct ggml_tensor *c_attn_attn_w; + struct ggml_tensor *c_attn_attn_b; + + struct ggml_tensor *c_attn_proj_w; + struct ggml_tensor *c_attn_proj_b; + + // post normalization + struct ggml_tensor *ln_2_g; + struct ggml_tensor *ln_2_b; + + // ff + struct ggml_tensor *c_mlp_fc_w; + struct ggml_tensor *c_mlp_fc_b; + + struct ggml_tensor *c_mlp_proj_w; + struct ggml_tensor *c_mlp_proj_b; +}; + +struct gpt_neox_model { + gpt_neox_hparams hparams; + + // normalization + struct ggml_tensor *ln_f_g; + struct ggml_tensor *ln_f_b; + + struct ggml_tensor *wte; // position embedding + + struct ggml_tensor *lmh_g; // language model head + // struct ggml_tensor * lmh_b; // language model bias + + std::vector layers; + + // key + value memory + struct ggml_tensor *memory_k; + struct ggml_tensor *memory_v; + + // + struct ggml_context *ctx; + std::map tensors; +}; + +// load the model's weights from a file +bool gpt_neox_model_load(const std::string &fname, gpt_neox_model &model, + gpt_vocab &vocab) { + auto fin = std::ifstream(fname, std::ios::binary); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); + return false; + } + + // verify magic + { + uint32_t magic; + fin.read((char *)&magic, sizeof(magic)); + if (magic != 0x67676d6c) { + fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, + fname.c_str()); + return false; + } + } + + // load hparams + { + auto &hparams = model.hparams; + + fin.read((char *)&hparams.n_vocab, sizeof(hparams.n_vocab)); + fin.read((char *)&hparams.n_ctx, sizeof(hparams.n_ctx)); + fin.read((char *)&hparams.n_embd, sizeof(hparams.n_embd)); + fin.read((char *)&hparams.n_head, sizeof(hparams.n_head)); + fin.read((char *)&hparams.n_layer, sizeof(hparams.n_layer)); + fin.read((char *)&hparams.n_rot, sizeof(hparams.n_rot)); + fin.read((char *)&hparams.par_res, sizeof(hparams.par_res)); + fin.read((char *)&hparams.ftype, sizeof(hparams.ftype)); + + const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; + + hparams.ftype %= GGML_QNT_VERSION_FACTOR; + } + + // load vocab + { + const int32_t n_vocab = model.hparams.n_vocab; + + std::string word; + std::vector buf(128); + + for (int i = 0; i < n_vocab; i++) { + uint32_t len; + fin.read((char *)&len, sizeof(len)); + + buf.resize(len); + fin.read((char *)buf.data(), len); + word.assign(buf.data(), len); + + vocab.token_to_id[word] = i; + vocab.id_to_token[i] = word; + } + } + + // for the big tensors, we have the option to store the data in 16-bit floats + // or quantized in order to save memory and also to speed up the computation + ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype)(model.hparams.ftype)); + if (wtype == GGML_TYPE_COUNT) { + fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", + __func__, fname.c_str(), model.hparams.ftype); + return false; + } + + auto &ctx = model.ctx; + + size_t ctx_size = 0; + + { + const auto &hparams = model.hparams; + + const size_t n_embd = hparams.n_embd; + const size_t n_layer = hparams.n_layer; + const size_t n_ctx = hparams.n_ctx; + const size_t n_vocab = hparams.n_vocab; + + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_g + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_b + + ctx_size += n_embd * n_vocab * ggml_type_sizef(wtype); // wte + + ctx_size += n_embd * n_vocab * ggml_type_sizef(wtype); // lmh_g + // ctx_size += n_vocab*ggml_type_sizef(GGML_TYPE_F32); // lmh_b + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_g + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_b + + ctx_size += n_layer * (3 * n_embd * n_embd * + ggml_type_sizef(wtype)); // c_attn_attn_w + ctx_size += n_layer * + (3 * n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_attn_attn_b + + ctx_size += + n_layer * (n_embd * n_embd * ggml_type_sizef(wtype)); // c_attn_proj_w + ctx_size += n_layer * (n_embd * n_embd * + ggml_type_sizef(GGML_TYPE_F32)); // c_attn_proj_b + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_g + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_b + + ctx_size += + n_layer * (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_fc_w + ctx_size += + n_layer * (4 * n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_fc_b + + ctx_size += n_layer * + (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_proj_w + ctx_size += + n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b + + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F32); // memory_k + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F32); // memory_v + + ctx_size += (6 + 16 * n_layer) * 1024; // object overhead + } + + // create the ggml context + { + struct ggml_init_params params = { + /*.mem_size =*/ctx_size, + /*.mem_buffer =*/NULL, + /*.no_alloc =*/false, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return false; + } + } + + // prepare memory for the weights + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_vocab = hparams.n_vocab; + + model.layers.resize(n_layer); + + model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + + model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + model.lmh_g = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + // model.lmh_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_vocab); + + // map by name + model.tensors["gpt_neox.embed_in.weight"] = model.wte; + + model.tensors["gpt_neox.final_layer_norm.weight"] = model.ln_f_g; + model.tensors["gpt_neox.final_layer_norm.bias"] = model.ln_f_b; + + model.tensors["embed_out.weight"] = model.lmh_g; + // model.tensors["lm_head.bias"] = model.lmh_b; + + for (int i = 0; i < n_layer; ++i) { + auto &layer = model.layers[i]; + + layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.c_attn_attn_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 3 * n_embd); + layer.c_attn_attn_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3 * n_embd); + + layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.ln_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4 * n_embd); + layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4 * n_embd); + + layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4 * n_embd, n_embd); + layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + // map by name + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".input_layernorm.weight"] = layer.ln_1_g; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".input_layernorm.bias"] = layer.ln_1_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".attention.query_key_value.weight"] = layer.c_attn_attn_w; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".attention.query_key_value.bias"] = layer.c_attn_attn_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".attention.dense.weight"] = layer.c_attn_proj_w; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".attention.dense.bias"] = layer.c_attn_proj_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".post_attention_layernorm.weight"] = layer.ln_2_g; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".post_attention_layernorm.bias"] = layer.ln_2_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".mlp.dense_h_to_4h.weight"] = layer.c_mlp_fc_w; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".mlp.dense_h_to_4h.bias"] = layer.c_mlp_fc_b; + + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".mlp.dense_4h_to_h.weight"] = layer.c_mlp_proj_w; + model.tensors["gpt_neox.layers." + std::to_string(i) + + ".mlp.dense_4h_to_h.bias"] = layer.c_mlp_proj_b; + } + } + + // key + value memory + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + + const int64_t n_mem = n_layer * n_ctx; + const int64_t n_elements = n_embd * n_mem; + + model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + + const size_t memory_size = + ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); + } + + // load weights + { + int n_tensors = 0; + size_t total_size = 0; + + while (true) { + int32_t n_dims; + int32_t length; + int32_t ttype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ttype), sizeof(ttype)); + + if (fin.eof()) { + break; + } + + int32_t nelements = 1; + int32_t ne[2] = {1, 1}; + for (int i = 0; i < n_dims; ++i) { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + + std::string name(length, 0); + fin.read(&name[0], length); + + if (model.tensors.find(name.data()) == model.tensors.end()) { + fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, + name.data()); + return false; + } + + auto tensor = model.tensors[name.data()]; + if (ggml_nelements(tensor) != nelements) { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", + __func__, name.data()); + return false; + } + + if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { + fprintf(stderr, + "%s: tensor '%s' has wrong shape in model file: got [%5d, " + "%5d], expected [%5d, %5d]\n", + __func__, name.data(), (int)tensor->ne[0], (int)tensor->ne[1], + ne[0], ne[1]); + return false; + } + + // for debugging + if (0) { + printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", + name.data(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), + ggml_nbytes(tensor) / 1024.0 / 1024.0, ggml_nbytes(tensor)); + } + + const size_t bpe = ggml_type_size(ggml_type(ttype)); + + if ((nelements * bpe) / ggml_blck_size(tensor->type) != + ggml_nbytes(tensor)) { + fprintf(stderr, + "%s: tensor '%s' has wrong size in model file: got %zu, " + "expected %zu\n", + __func__, name.data(), ggml_nbytes(tensor), nelements * bpe); + return false; + } + + fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + + total_size += ggml_nbytes(tensor); + } + } + + fin.close(); + + return true; +} + +// feed-forward network +ggml_tensor *gpt_neox_ff(const gpt_neox_layer &layer, ggml_context *ctx0, + ggml_tensor *inp) { + ggml_tensor *cur = ggml_norm(ctx0, inp); + + cur = + ggml_add(ctx0, ggml_mul(ctx0, ggml_repeat(ctx0, layer.ln_2_g, cur), cur), + ggml_repeat(ctx0, layer.ln_2_b, cur)); + + cur = ggml_mul_mat(ctx0, layer.c_mlp_fc_w, cur); + + cur = ggml_add(ctx0, ggml_repeat(ctx0, layer.c_mlp_fc_b, cur), cur); + + // GELU activation + cur = ggml_gelu(ctx0, cur); + + // projection + // cur = proj_w*cur + proj_b + cur = ggml_mul_mat(ctx0, layer.c_mlp_proj_w, cur); + + cur = ggml_add(ctx0, ggml_repeat(ctx0, layer.c_mlp_proj_b, cur), cur); + return cur; +} + +// evaluate the transformer +// +// - model: the model +// - n_threads: number of threads to use +// - n_past: the context size so far +// - embd_inp: the embeddings of the tokens in the context +// - embd_w: the predicted logits for the next token +// +bool gpt_neox_eval(const gpt_neox_model &model, const int n_threads, + const int n_past, const std::vector &embd_inp, + std::vector &embd_w, size_t &mem_per_token) { + const int N = embd_inp.size(); + + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_head = hparams.n_head; + const int n_vocab = hparams.n_vocab; + const int n_rot = hparams.n_rot; + + static size_t buf_size = 256u * 1024 * 1024; + static void *buf = malloc(buf_size); + + // use 2 scratch buffers + // TODO: very hacky solution - reimplement in a more elegant way + static size_t scr0_size = 256u * 1024 * 1024; + static void *scr0 = malloc(scr0_size); + + static size_t scr1_size = 256u * 1024 * 1024; + static void *scr1 = malloc(scr1_size); + + if (mem_per_token > 0 && mem_per_token * N > buf_size) { + const size_t buf_size_new = + 1.1 * + (mem_per_token * N); // add 10% to account for ggml object overhead + // printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, + // buf_size, buf_size_new); + + // reallocate + buf_size = buf_size_new; + buf = realloc(buf, buf_size); + if (buf == nullptr) { + fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); + return false; + } + } + + struct ggml_init_params params = { + /*.mem_size =*/buf_size, + /*.mem_buffer =*/buf, + /*.no_alloc =*/false, + }; + + struct ggml_context *ctx0 = ggml_init(params); + struct ggml_cgraph gf = {}; + gf.n_threads = n_threads; + + struct ggml_tensor *embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd)); + + // wte + struct ggml_tensor *inpL = ggml_get_rows(ctx0, model.wte, embd); + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor *cur; + + ggml_set_scratch(ctx0, { + 0, + scr0_size, + scr0, + }); + + // self-attention + { + { + cur = ggml_norm(ctx0, inpL); + + cur = ggml_add( + ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_1_g, cur), + cur), + ggml_repeat(ctx0, model.layers[il].ln_1_b, cur)); + } + + // compute QKV + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_attn_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_attn_b, cur), cur); + } + + struct ggml_tensor *Qcur = + ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd / n_head, n_head, N, + cur->nb[1] / n_head, cur->nb[1], + 0 * sizeof(float) * n_embd / n_head)); + struct ggml_tensor *Kcur = + ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd / n_head, n_head, N, + cur->nb[1] / n_head, cur->nb[1], + 1 * sizeof(float) * n_embd / n_head)); + struct ggml_tensor *Vcur = + ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd / n_head, n_head, N, + cur->nb[1] / n_head, cur->nb[1], + 2 * sizeof(float) * n_embd / n_head)); + + // using mode = 2 for GPT-NeoX mode + Qcur = ggml_rope_inplace(ctx0, Qcur, n_past, n_rot, 2); + Kcur = ggml_rope_inplace(ctx0, Kcur, n_past, n_rot, 2); + + // store key and value to memory + { + Vcur = ggml_transpose(ctx0, ggml_reshape_2d(ctx0, Vcur, n_embd, N)); + + struct ggml_tensor *k = + ggml_view_1d(ctx0, model.memory_k, N * n_embd, + (ggml_element_size(model.memory_k) * n_embd) * + (il * n_ctx + n_past)); + struct ggml_tensor *v = ggml_view_2d( + ctx0, model.memory_v, N, n_embd, + (n_ctx)*ggml_element_size(model.memory_v), + (il * n_ctx) * ggml_element_size(model.memory_v) * n_embd + + n_past * ggml_element_size(model.memory_v)); + + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, + // 3) + struct ggml_tensor *Q = ggml_permute(ctx0, Qcur, 0, 2, 1, 3); + + // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) + struct ggml_tensor *K = ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d( + ctx0, model.memory_k, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_k) * n_embd), + n_embd / n_head, n_head, n_past + N), + 0, 2, 1, 3); + + // K * Q + struct ggml_tensor *KQ = ggml_mul_mat(ctx0, K, Q); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + struct ggml_tensor *KQ_scaled = ggml_scale_inplace( + ctx0, KQ, ggml_new_f32(ctx0, 1.0f / sqrt(float(n_embd) / n_head))); + + // KQ_masked = mask_past(KQ_scaled) + struct ggml_tensor *KQ_masked = + ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past); + + // KQ = soft_max(KQ_masked) + struct ggml_tensor *KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); + + // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, + // 3).contiguous() + struct ggml_tensor *V = ggml_view_3d( + ctx0, model.memory_v, n_past + N, n_embd / n_head, n_head, + n_ctx * ggml_element_size(model.memory_v), + n_ctx * ggml_element_size(model.memory_v) * n_embd / n_head, + il * n_ctx * ggml_element_size(model.memory_v) * n_embd); + + // KQV = transpose(V) * KQ_soft_max + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + struct ggml_tensor *KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + + // cur = KQV_merged.contiguous().view(n_embd, N) + cur = ggml_cpy(ctx0, KQV_merged, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_proj_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_proj_b, cur), cur); + } + } + + ggml_set_scratch(ctx0, { + 0, + scr1_size, + scr1, + }); + + if (hparams.par_res == 0) { + struct ggml_tensor *inpFF = ggml_add(ctx0, cur, inpL); + + cur = gpt_neox_ff(model.layers[il], ctx0, inpFF); + + // input for next layer + inpL = ggml_add(ctx0, cur, inpFF); + } else { + struct ggml_tensor *inpFF = cur; + + // this is independent of the self-attention result, so it could be done + // in parallel to the self-attention note here we pass inpL instead of cur + cur = gpt_neox_ff(model.layers[il], ctx0, inpL); + + // layer input + FF + cur = ggml_add(ctx0, cur, inpFF); + + // input for next layer + inpL = ggml_add(ctx0, cur, inpL); + } + } + + ggml_set_scratch(ctx0, { + 0, + scr0_size, + scr0, + }); + + // norm + { + inpL = ggml_norm(ctx0, inpL); + + // inpL = ln_f_g*inpL + ln_f_b + inpL = ggml_add(ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.ln_f_g, inpL), inpL), + ggml_repeat(ctx0, model.ln_f_b, inpL)); + } + + ggml_set_scratch(ctx0, { + 0, + 0, + nullptr, + }); + + // lm_head + { + inpL = ggml_mul_mat(ctx0, model.lmh_g, inpL); + + // inpL = ggml_add(ctx0, + // ggml_repeat(ctx0, model.lmh_b, inpL), + // inpL); + } + + // logits -> probs + // inpL = ggml_soft_max_inplace(ctx0, inpL); + + // run the computation + ggml_build_forward_expand(&gf, inpL); + ggml_graph_compute(ctx0, &gf); + + // if (n_past%100 == 0) { + // ggml_graph_print (&gf); + // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); + //} + + // embd_w.resize(n_vocab*N); + // memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); + + // return result for just the last token + embd_w.resize(n_vocab); + memcpy(embd_w.data(), (float *)ggml_get_data(inpL) + (n_vocab * (N - 1)), + sizeof(float) * n_vocab); + + if (mem_per_token == 0) { + mem_per_token = ggml_used_mem(ctx0) / N; + } + // printf("used_mem = %zu\n", ggml_used_mem(ctx0)); + + ggml_free(ctx0); + + return true; +} + +REGISTER_LLM(gpt_neox); diff --git a/ctransformers/models/llms/gpt2.cc b/ctransformers/models/llms/gpt2.cc new file mode 100644 index 0000000000000000000000000000000000000000..28c043370fa6f44e35becbb077e1f451659a4f75 --- /dev/null +++ b/ctransformers/models/llms/gpt2.cc @@ -0,0 +1,702 @@ +#include "llm.h" + +// https://github.com/ggerganov/ggml/blob/master/examples/gpt-2/main.cpp + +// default hparams (GPT-2 117M) +struct gpt2_hparams { + int32_t n_vocab = 50257; + int32_t n_ctx = 1024; + int32_t n_embd = 768; + int32_t n_head = 12; + int32_t n_layer = 12; + int32_t ftype = 1; +}; + +struct gpt2_layer { + // normalization + struct ggml_tensor *ln_1_g; + struct ggml_tensor *ln_1_b; + + struct ggml_tensor *ln_2_g; + struct ggml_tensor *ln_2_b; + + // attention + struct ggml_tensor *c_attn_attn_w; + struct ggml_tensor *c_attn_attn_b; + + struct ggml_tensor *c_attn_proj_w; + struct ggml_tensor *c_attn_proj_b; + + // mlp + struct ggml_tensor *c_mlp_fc_w; + struct ggml_tensor *c_mlp_fc_b; + + struct ggml_tensor *c_mlp_proj_w; + struct ggml_tensor *c_mlp_proj_b; +}; + +struct gpt2_model { + gpt2_hparams hparams; + + // normalization + struct ggml_tensor *ln_f_g; + struct ggml_tensor *ln_f_b; + + struct ggml_tensor *wte; // position embedding + struct ggml_tensor *wpe; // token embedding + struct ggml_tensor *lm_head; // language model head + + std::vector layers; + + // key + value memory + struct ggml_tensor *memory_k; + struct ggml_tensor *memory_v; + + // + struct ggml_context *ctx; + std::map tensors; +}; + +// load the model's weights from a file +bool gpt2_model_load(const std::string &fname, gpt2_model &model, + gpt_vocab &vocab) { + auto fin = std::ifstream(fname, std::ios::binary); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); + return false; + } + + // verify magic + { + uint32_t magic; + fin.read((char *)&magic, sizeof(magic)); + if (magic != 0x67676d6c) { + fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, + fname.c_str()); + return false; + } + } + + // load hparams + { + auto &hparams = model.hparams; + + fin.read((char *)&hparams.n_vocab, sizeof(hparams.n_vocab)); + fin.read((char *)&hparams.n_ctx, sizeof(hparams.n_ctx)); + fin.read((char *)&hparams.n_embd, sizeof(hparams.n_embd)); + fin.read((char *)&hparams.n_head, sizeof(hparams.n_head)); + fin.read((char *)&hparams.n_layer, sizeof(hparams.n_layer)); + fin.read((char *)&hparams.ftype, sizeof(hparams.ftype)); + + const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; + + hparams.ftype %= GGML_QNT_VERSION_FACTOR; + } + + // load vocab + { + int32_t n_vocab = 0; + fin.read((char *)&n_vocab, sizeof(n_vocab)); + + if (n_vocab != model.hparams.n_vocab) { + fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", + __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); + return false; + } + + std::string word; + std::vector buf(128); + + for (int i = 0; i < n_vocab; i++) { + uint32_t len; + fin.read((char *)&len, sizeof(len)); + + buf.resize(len); + fin.read((char *)buf.data(), len); + word.assign(buf.data(), len); + + vocab.token_to_id[word] = i; + vocab.id_to_token[i] = word; + } + } + + // for the big tensors, we have the option to store the data in 16-bit floats + // or quantized in order to save memory and also to speed up the computation + ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype)(model.hparams.ftype)); + if (wtype == GGML_TYPE_COUNT) { + fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", + __func__, fname.c_str(), model.hparams.ftype); + return false; + } + + auto &ctx = model.ctx; + + size_t ctx_size = 0; + + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_vocab = hparams.n_vocab; + + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_g + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_b + + ctx_size += n_vocab * n_embd * ggml_type_sizef(wtype); // wte + ctx_size += n_ctx * n_embd * ggml_type_sizef(GGML_TYPE_F32); // wpe + ctx_size += n_vocab * n_embd * ggml_type_sizef(wtype); // lm_head + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_g + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_b + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_g + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_b + + ctx_size += n_layer * (3 * n_embd * n_embd * + ggml_type_sizef(wtype)); // c_attn_attn_w + ctx_size += n_layer * + (3 * n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_attn_attn_b + + ctx_size += + n_layer * (n_embd * n_embd * ggml_type_sizef(wtype)); // c_attn_proj_w + ctx_size += + n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_attn_proj_b + + ctx_size += + n_layer * (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_fc_w + ctx_size += + n_layer * (4 * n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_fc_b + + ctx_size += n_layer * + (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_proj_w + ctx_size += + n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b + + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F32); // memory_k + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F32); // memory_v + + ctx_size += (6 + 12 * n_layer) * 512; // object overhead + } + + // create the ggml context + { + struct ggml_init_params params = { + /*.mem_size =*/ctx_size, + /*.mem_buffer =*/NULL, + /*.no_alloc =*/false, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return false; + } + } + + // prepare memory for the weights + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_vocab = hparams.n_vocab; + + model.layers.resize(n_layer); + + model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + model.wpe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ctx); + model.lm_head = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + + // map by name + model.tensors["model/ln_f/g"] = model.ln_f_g; + model.tensors["model/ln_f/b"] = model.ln_f_b; + + model.tensors["model/wte"] = model.wte; + model.tensors["model/wpe"] = model.wpe; + model.tensors["model/lm_head"] = model.lm_head; + + for (int i = 0; i < n_layer; ++i) { + auto &layer = model.layers[i]; + + layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.ln_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.c_attn_attn_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 3 * n_embd); + layer.c_attn_attn_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3 * n_embd); + + layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4 * n_embd); + layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4 * n_embd); + + layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4 * n_embd, n_embd); + layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + // map by name + model.tensors["model/h" + std::to_string(i) + "/ln_1/g"] = layer.ln_1_g; + model.tensors["model/h" + std::to_string(i) + "/ln_1/b"] = layer.ln_1_b; + + model.tensors["model/h" + std::to_string(i) + "/ln_2/g"] = layer.ln_2_g; + model.tensors["model/h" + std::to_string(i) + "/ln_2/b"] = layer.ln_2_b; + + model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/w"] = + layer.c_attn_attn_w; + model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/b"] = + layer.c_attn_attn_b; + + model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/w"] = + layer.c_attn_proj_w; + model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/b"] = + layer.c_attn_proj_b; + + model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/w"] = + layer.c_mlp_fc_w; + model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/b"] = + layer.c_mlp_fc_b; + + model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/w"] = + layer.c_mlp_proj_w; + model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/b"] = + layer.c_mlp_proj_b; + } + } + + // key + value memory + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + + const int n_mem = n_layer * n_ctx; + const int n_elements = n_embd * n_mem; + + model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); + model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); + + const size_t memory_size = + ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); + } + + // load weights + { + size_t total_size = 0; + + bool has_lm_head = false; + + while (true) { + int32_t n_dims; + int32_t length; + int32_t ttype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ttype), sizeof(ttype)); + + if (fin.eof()) { + break; + } + + int32_t nelements = 1; + int32_t ne[2] = {1, 1}; + for (int i = 0; i < n_dims; ++i) { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + + std::string name(length, 0); + fin.read(&name[0], length); + + if (model.tensors.find(name.data()) == model.tensors.end()) { + fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, + name.data()); + return false; + } + + auto tensor = model.tensors[name.data()]; + if (ggml_nelements(tensor) != nelements) { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", + __func__, name.data()); + return false; + } + + if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { + fprintf(stderr, + "%s: tensor '%s' has wrong shape in model file: got [%d, %d], " + "expected [%d, %d]\n", + __func__, name.data(), (int)tensor->ne[0], (int)tensor->ne[1], + ne[0], ne[1]); + return false; + } + + // for debugging + if (0) { + printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", + name.data(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), + ggml_nbytes(tensor) / 1024.0 / 1024.0, ggml_nbytes(tensor)); + } + + const size_t bpe = ggml_type_size(ggml_type(ttype)); + + if ((nelements * bpe) / ggml_blck_size(tensor->type) != + ggml_nbytes(tensor)) { + fprintf(stderr, + "%s: tensor '%s' has wrong size in model file: got %zu, " + "expected %zu\n", + __func__, name.data(), ggml_nbytes(tensor), nelements * bpe); + return false; + } + + fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + + // GPT-2 models share the WTE tensor as the LM head + if (name == "model/wte" && has_lm_head == false) { + memcpy(model.lm_head->data, tensor->data, ggml_nbytes(tensor)); + } + + if (name == "model/lm_head") { + has_lm_head = true; + } + + total_size += ggml_nbytes(tensor); + } + } + + fin.close(); + + return true; +} + +// evaluate the transformer +// +// - model: the model +// - n_threads: number of threads to use +// - n_past: the context size so far +// - embd_inp: the embeddings of the tokens in the context +// - embd_w: the predicted logits for the next token +// +bool gpt2_eval(const gpt2_model &model, const int n_threads, const int n_past, + const std::vector &embd_inp, + std::vector &embd_w, size_t &mem_per_token) { + const int N = embd_inp.size(); + + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_head = hparams.n_head; + const int n_vocab = hparams.n_vocab; + + static size_t buf_size = 256u * 1024 * 1024; + static void *buf = malloc(buf_size); + + if (mem_per_token > 0 && mem_per_token * N > buf_size) { + const size_t buf_size_new = + 1.1 * + (mem_per_token * N); // add 10% to account for ggml object overhead + // printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, + // buf_size, buf_size_new); + + // reallocate + buf_size = buf_size_new; + buf = realloc(buf, buf_size); + if (buf == nullptr) { + fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); + return false; + } + } + + struct ggml_init_params params = { + /*.mem_size =*/buf_size, + /*.mem_buffer =*/buf, + /*.no_alloc =*/false, + }; + + struct ggml_context *ctx0 = ggml_init(params); + struct ggml_cgraph gf = {}; + gf.n_threads = n_threads; + + struct ggml_tensor *embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd)); + + struct ggml_tensor *position = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + for (int i = 0; i < N; ++i) { + ((int32_t *)position->data)[i] = n_past + i; + } + + // wte + wpe + struct ggml_tensor *inpL = + ggml_add(ctx0, ggml_get_rows(ctx0, model.wte, embd), + ggml_get_rows(ctx0, model.wpe, position)); + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor *cur; + + // norm + { + // [ 768, N] + cur = ggml_norm(ctx0, inpL); + + // cur = ln_1_g*cur + ln_1_b + // [ 768, N] + cur = ggml_add( + ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_1_g, cur), cur), + ggml_repeat(ctx0, model.layers[il].ln_1_b, cur)); + } + + // attn + // [2304, 768] - model.layers[il].c_attn_attn_w + // [2304, 1] - model.layers[il].c_attn_attn_b + // [ 768, N] - cur (in) + // [2304, N] - cur (out) + // + // cur = attn_w*cur + attn_b + // [2304, N] + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_attn_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_attn_b, cur), cur); + } + + // self-attention + { + struct ggml_tensor *Qcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], + 0 * sizeof(float) * n_embd); + struct ggml_tensor *Kcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], + 1 * sizeof(float) * n_embd); + struct ggml_tensor *Vcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], + 2 * sizeof(float) * n_embd); + + // store key and value to memory + if (N >= 1) { + struct ggml_tensor *k = + ggml_view_1d(ctx0, model.memory_k, N * n_embd, + (ggml_element_size(model.memory_k) * n_embd) * + (il * n_ctx + n_past)); + struct ggml_tensor *v = + ggml_view_1d(ctx0, model.memory_v, N * n_embd, + (ggml_element_size(model.memory_v) * n_embd) * + (il * n_ctx + n_past)); + + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, + // 3) [64, N, 12] + struct ggml_tensor *Q = + ggml_permute(ctx0, + ggml_cpy(ctx0, Qcur, + ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, + n_embd / n_head, n_head, N)), + 0, 2, 1, 3); + + // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) + // [64, n_past + N, 12] + struct ggml_tensor *K = ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d( + ctx0, model.memory_k, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_k) * n_embd), + n_embd / n_head, n_head, n_past + N), + 0, 2, 1, 3); + + // GG: flash attention + // struct ggml_tensor * V = + // ggml_cpy(ctx0, + // ggml_permute(ctx0, + // ggml_reshape_3d(ctx0, + // ggml_view_1d(ctx0, model.memory_v, (n_past + + // N)*n_embd, + // il*n_ctx*ggml_element_size(model.memory_v)*n_embd), + // n_embd/n_head, n_head, n_past + N), + // 1, 2, 0, 3), + // ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_past + N, + // n_embd/n_head, n_head)); + + // struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, true); + + // K * Q + // [n_past + N, N, 12] + struct ggml_tensor *KQ = ggml_mul_mat(ctx0, K, Q); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + // [n_past + N, N, 12] + struct ggml_tensor *KQ_scaled = ggml_scale_inplace( + ctx0, KQ, ggml_new_f32(ctx0, 1.0f / sqrt(float(n_embd) / n_head))); + + // KQ_masked = mask_past(KQ_scaled) + // [n_past + N, N, 12] + struct ggml_tensor *KQ_masked = + ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past); + + // KQ = soft_max(KQ_masked) + // [n_past + N, N, 12] + struct ggml_tensor *KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); + + // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, + // 3).contiguous() [n_past + N, 64, 12] + struct ggml_tensor *V_trans = ggml_cpy( + ctx0, + ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d( + ctx0, model.memory_v, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_v) * n_embd), + n_embd / n_head, n_head, n_past + N), + 1, 2, 0, 3), + ggml_new_tensor_3d(ctx0, model.memory_v->type, n_past + N, + n_embd / n_head, n_head)); + + // KQV = transpose(V) * KQ_soft_max + // [64, N, 12] + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + // [64, 12, N] + struct ggml_tensor *KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + + // cur = KQV_merged.contiguous().view(n_embd, N) + // [768, N] + cur = ggml_cpy(ctx0, KQV_merged, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + } + + // projection + // [ 768, 768] - model.layers[il].c_attn_proj_w + // [ 768, 1] - model.layers[il].c_attn_proj_b + // [ 768, N] - cur (in) + // [ 768, N] - cur (out) + // + // cur = proj_w*cur + proj_b + // [768, N] + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_proj_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_proj_b, cur), cur); + } + + // add the input + cur = ggml_add(ctx0, cur, inpL); + + struct ggml_tensor *inpFF = cur; + + // feed-forward network + { + // norm + { + cur = ggml_norm(ctx0, inpFF); + + // cur = ln_2_g*cur + ln_2_b + // [ 768, N] + cur = ggml_add( + ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_2_g, cur), + cur), + ggml_repeat(ctx0, model.layers[il].ln_2_b, cur)); + } + + // fully connected + // [3072, 768] - model.layers[il].c_mlp_fc_w + // [3072, 1] - model.layers[il].c_mlp_fc_b + // [ 768, N] - cur (in) + // [3072, N] - cur (out) + // + // cur = fc_w*cur + fc_b + // [3072, N] + cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_fc_w, cur); + + cur = ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_fc_b, cur), + cur); + + // GELU activation + // [3072, N] + cur = ggml_gelu(ctx0, cur); + + // projection + // [ 768, 3072] - model.layers[il].c_mlp_proj_w + // [ 768, 1] - model.layers[il].c_mlp_proj_b + // [3072, N] - cur (in) + // [ 768, N] - cur (out) + // + // cur = proj_w*cur + proj_b + // [768, N] + cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_proj_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_proj_b, cur), cur); + } + + // input for next layer + inpL = ggml_add(ctx0, cur, inpFF); + } + + // norm + { + // [ 768, N] + inpL = ggml_norm(ctx0, inpL); + + // inpL = ln_f_g*inpL + ln_f_b + // [ 768, N] + inpL = ggml_add(ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.ln_f_g, inpL), inpL), + ggml_repeat(ctx0, model.ln_f_b, inpL)); + } + + // inpL = WTE * inpL + // [ 768, 50257] - model.lm_head + // [ 768, N] - inpL + inpL = ggml_mul_mat(ctx0, model.lm_head, inpL); + + // logits -> probs + // inpL = ggml_soft_max_inplace(ctx0, inpL); + + // run the computation + ggml_build_forward_expand(&gf, inpL); + ggml_graph_compute(ctx0, &gf); + + // if (n_past%100 == 0) { + // ggml_graph_print (&gf); + // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); + //} + + // embd_w.resize(n_vocab*N); + // memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); + + // return result just for the last token + embd_w.resize(n_vocab); + memcpy(embd_w.data(), (float *)ggml_get_data(inpL) + (n_vocab * (N - 1)), + sizeof(float) * n_vocab); + + if (mem_per_token == 0) { + mem_per_token = ggml_used_mem(ctx0) / N; + } + // printf("used_mem = %zu\n", ggml_used_mem(ctx0)); + + ggml_free(ctx0); + + return true; +} + +REGISTER_LLM(gpt2); diff --git a/ctransformers/models/llms/gptj.cc b/ctransformers/models/llms/gptj.cc new file mode 100644 index 0000000000000000000000000000000000000000..09c51ba9a9366501bd7bbd61430d5c680f15bfdb --- /dev/null +++ b/ctransformers/models/llms/gptj.cc @@ -0,0 +1,609 @@ +#include "llm.h" + +// https://github.com/ggerganov/ggml/blob/master/examples/gpt-j/main.cpp + +// default hparams (GPT-J 6B) +struct gptj_hparams { + int32_t n_vocab = 50400; + int32_t n_ctx = 2048; + int32_t n_embd = 4096; + int32_t n_head = 16; + int32_t n_layer = 28; + int32_t n_rot = 64; + int32_t ftype = 1; +}; + +struct gptj_layer { + // normalization + struct ggml_tensor *ln_1_g; + struct ggml_tensor *ln_1_b; + + // attention + struct ggml_tensor *c_attn_q_proj_w; + struct ggml_tensor *c_attn_k_proj_w; + struct ggml_tensor *c_attn_v_proj_w; + + struct ggml_tensor *c_attn_proj_w; + + // ff + struct ggml_tensor *c_mlp_fc_w; + struct ggml_tensor *c_mlp_fc_b; + + struct ggml_tensor *c_mlp_proj_w; + struct ggml_tensor *c_mlp_proj_b; +}; + +struct gptj_model { + gptj_hparams hparams; + + // normalization + struct ggml_tensor *ln_f_g; + struct ggml_tensor *ln_f_b; + + struct ggml_tensor *wte; // position embedding + + struct ggml_tensor *lmh_g; // language model head + struct ggml_tensor *lmh_b; // language model bias + + std::vector layers; + + // key + value memory + struct ggml_tensor *memory_k; + struct ggml_tensor *memory_v; + + // + struct ggml_context *ctx; + std::map tensors; +}; + +// load the model's weights from a file +bool gptj_model_load(const std::string &fname, gptj_model &model, + gpt_vocab &vocab) { + auto fin = std::ifstream(fname, std::ios::binary); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); + return false; + } + + // verify magic + { + uint32_t magic; + fin.read((char *)&magic, sizeof(magic)); + if (magic != 0x67676d6c) { + fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, + fname.c_str()); + return false; + } + } + + // load hparams + { + auto &hparams = model.hparams; + + fin.read((char *)&hparams.n_vocab, sizeof(hparams.n_vocab)); + fin.read((char *)&hparams.n_ctx, sizeof(hparams.n_ctx)); + fin.read((char *)&hparams.n_embd, sizeof(hparams.n_embd)); + fin.read((char *)&hparams.n_head, sizeof(hparams.n_head)); + fin.read((char *)&hparams.n_layer, sizeof(hparams.n_layer)); + fin.read((char *)&hparams.n_rot, sizeof(hparams.n_rot)); + fin.read((char *)&hparams.ftype, sizeof(hparams.ftype)); + + const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; + + hparams.ftype %= GGML_QNT_VERSION_FACTOR; + } + + // load vocab + { + int32_t n_vocab = 0; + fin.read((char *)&n_vocab, sizeof(n_vocab)); + + if (n_vocab != model.hparams.n_vocab) { + fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", + __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); + return false; + } + + std::string word; + std::vector buf(128); + + for (int i = 0; i < n_vocab; i++) { + uint32_t len; + fin.read((char *)&len, sizeof(len)); + + buf.resize(len); + fin.read((char *)buf.data(), len); + word.assign(buf.data(), len); + + vocab.token_to_id[word] = i; + vocab.id_to_token[i] = word; + } + } + + // for the big tensors, we have the option to store the data in 16-bit floats + // or quantized in order to save memory and also to speed up the computation + ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype)(model.hparams.ftype)); + if (wtype == GGML_TYPE_COUNT) { + fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", + __func__, fname.c_str(), model.hparams.ftype); + return false; + } + + auto &ctx = model.ctx; + + size_t ctx_size = 0; + + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_vocab = hparams.n_vocab; + + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_g + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_b + + ctx_size += n_embd * n_vocab * ggml_type_sizef(wtype); // wte + + ctx_size += n_embd * n_vocab * ggml_type_sizef(wtype); // lmh_g + ctx_size += n_vocab * ggml_type_sizef(GGML_TYPE_F32); // lmh_b + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_g + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_b + + ctx_size += n_layer * + (n_embd * n_embd * ggml_type_sizef(wtype)); // c_attn_q_proj_w + ctx_size += n_layer * + (n_embd * n_embd * ggml_type_sizef(wtype)); // c_attn_k_proj_w + ctx_size += n_layer * + (n_embd * n_embd * ggml_type_sizef(wtype)); // c_attn_v_proj_w + + ctx_size += + n_layer * (n_embd * n_embd * ggml_type_sizef(wtype)); // c_attn_proj_w + + ctx_size += + n_layer * (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_fc_w + ctx_size += + n_layer * (4 * n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_fc_b + + ctx_size += n_layer * + (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_proj_w + ctx_size += + n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b + + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F16); // memory_k + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F16); // memory_v + + ctx_size += (5 + 10 * n_layer) * 512; // object overhead + } + + // create the ggml context + { + struct ggml_init_params params = { + /*.mem_size =*/ctx_size, + /*.mem_buffer =*/NULL, + /*.no_alloc =*/false, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return false; + } + } + + // prepare memory for the weights + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_vocab = hparams.n_vocab; + + model.layers.resize(n_layer); + + model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + + model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + model.lmh_g = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + model.lmh_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_vocab); + + // map by name + model.tensors["transformer.wte.weight"] = model.wte; + + model.tensors["transformer.ln_f.weight"] = model.ln_f_g; + model.tensors["transformer.ln_f.bias"] = model.ln_f_b; + + model.tensors["lm_head.weight"] = model.lmh_g; + model.tensors["lm_head.bias"] = model.lmh_b; + + for (int i = 0; i < n_layer; ++i) { + auto &layer = model.layers[i]; + + layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.c_attn_q_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.c_attn_k_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.c_attn_v_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + + layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + + layer.c_mlp_fc_w = ggml_new_tensor_2d(ctx, wtype, n_embd, 4 * n_embd); + layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4 * n_embd); + + layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4 * n_embd, n_embd); + layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + // map by name + model.tensors["transformer.h." + std::to_string(i) + ".ln_1.weight"] = + layer.ln_1_g; + model.tensors["transformer.h." + std::to_string(i) + ".ln_1.bias"] = + layer.ln_1_b; + + model.tensors["transformer.h." + std::to_string(i) + + ".attn.q_proj.weight"] = layer.c_attn_q_proj_w; + model.tensors["transformer.h." + std::to_string(i) + + ".attn.k_proj.weight"] = layer.c_attn_k_proj_w; + model.tensors["transformer.h." + std::to_string(i) + + ".attn.v_proj.weight"] = layer.c_attn_v_proj_w; + + model.tensors["transformer.h." + std::to_string(i) + + ".attn.out_proj.weight"] = layer.c_attn_proj_w; + + model + .tensors["transformer.h." + std::to_string(i) + ".mlp.fc_in.weight"] = + layer.c_mlp_fc_w; + model.tensors["transformer.h." + std::to_string(i) + ".mlp.fc_in.bias"] = + layer.c_mlp_fc_b; + + model.tensors["transformer.h." + std::to_string(i) + + ".mlp.fc_out.weight"] = layer.c_mlp_proj_w; + model.tensors["transformer.h." + std::to_string(i) + ".mlp.fc_out.bias"] = + layer.c_mlp_proj_b; + } + } + + // key + value memory + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + + const int n_mem = n_layer * n_ctx; + const int n_elements = n_embd * n_mem; + + model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + + const size_t memory_size = + ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); + } + + // load weights + { + int n_tensors = 0; + size_t total_size = 0; + + while (true) { + int32_t n_dims; + int32_t length; + int32_t ttype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ttype), sizeof(ttype)); + + if (fin.eof()) { + break; + } + + int32_t nelements = 1; + int32_t ne[2] = {1, 1}; + for (int i = 0; i < n_dims; ++i) { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + + std::string name(length, 0); + fin.read(&name[0], length); + + if (model.tensors.find(name.data()) == model.tensors.end()) { + fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, + name.data()); + return false; + } + + auto tensor = model.tensors[name.data()]; + if (ggml_nelements(tensor) != nelements) { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", + __func__, name.data()); + return false; + } + + if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { + fprintf(stderr, + "%s: tensor '%s' has wrong shape in model file: got [%d, %d], " + "expected [%d, %d]\n", + __func__, name.data(), (int)tensor->ne[0], (int)tensor->ne[1], + ne[0], ne[1]); + return false; + } + + // for debugging + if (0) { + printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", + name.data(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), + ggml_nbytes(tensor) / 1024.0 / 1024.0, ggml_nbytes(tensor)); + } + + const size_t bpe = ggml_type_size(ggml_type(ttype)); + + if ((nelements * bpe) / ggml_blck_size(tensor->type) != + ggml_nbytes(tensor)) { + fprintf(stderr, + "%s: tensor '%s' has wrong size in model file: got %zu, " + "expected %zu\n", + __func__, name.data(), ggml_nbytes(tensor), nelements * bpe); + return false; + } + + fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + + // printf("%42s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], + // ne[1], ttype == 0 ? "float" : "f16", + // ggml_nbytes(tensor)/1024.0/1024.0); + total_size += ggml_nbytes(tensor); + } + } + + fin.close(); + + return true; +} + +// evaluate the transformer +// +// - model: the model +// - n_threads: number of threads to use +// - n_past: the context size so far +// - embd_inp: the embeddings of the tokens in the context +// - embd_w: the predicted logits for the next token +// +// The GPT-J model requires about 16MB of memory per input token. +// +bool gptj_eval(const gptj_model &model, const int n_threads, const int n_past, + const std::vector &embd_inp, + std::vector &embd_w, size_t &mem_per_token) { + const int N = embd_inp.size(); + + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_head = hparams.n_head; + const int n_vocab = hparams.n_vocab; + const int n_rot = hparams.n_rot; + + static size_t buf_size = 256u * 1024 * 1024; + static void *buf = malloc(buf_size); + + if (mem_per_token > 0 && mem_per_token * N > buf_size) { + const size_t buf_size_new = + 1.1 * + (mem_per_token * N); // add 10% to account for ggml object overhead + // printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, + // buf_size, buf_size_new); + + // reallocate + buf_size = buf_size_new; + buf = realloc(buf, buf_size); + if (buf == nullptr) { + fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); + return false; + } + } + + struct ggml_init_params params = { + /*.mem_size =*/buf_size, + /*.mem_buffer =*/buf, + /*.no_alloc =*/false, + }; + + struct ggml_context *ctx0 = ggml_init(params); + struct ggml_cgraph gf = {}; + gf.n_threads = n_threads; + + struct ggml_tensor *embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd)); + + // wte + struct ggml_tensor *inpL = ggml_get_rows(ctx0, model.wte, embd); + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor *cur; + + // norm + { + cur = ggml_norm(ctx0, inpL); + + // cur = ln_1_g*cur + ln_1_b + cur = ggml_add( + ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_1_g, cur), cur), + ggml_repeat(ctx0, model.layers[il].ln_1_b, cur)); + } + + struct ggml_tensor *inpSA = cur; + + // self-attention + { + struct ggml_tensor *Qcur = ggml_rope_inplace( + ctx0, + ggml_reshape_3d( + ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), + n_embd / n_head, n_head, N), + n_past, n_rot, 0); + struct ggml_tensor *Kcur = ggml_rope_inplace( + ctx0, + ggml_reshape_3d( + ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), + n_embd / n_head, n_head, N), + n_past, n_rot, 0); + + // store key and value to memory + { + struct ggml_tensor *Vcur = ggml_transpose( + ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_v_proj_w, cur)); + + struct ggml_tensor *k = + ggml_view_1d(ctx0, model.memory_k, N * n_embd, + (ggml_element_size(model.memory_k) * n_embd) * + (il * n_ctx + n_past)); + struct ggml_tensor *v = ggml_view_2d( + ctx0, model.memory_v, N, n_embd, + (n_ctx)*ggml_element_size(model.memory_v), + (il * n_ctx) * ggml_element_size(model.memory_v) * n_embd + + n_past * ggml_element_size(model.memory_v)); + + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, + // 3) + struct ggml_tensor *Q = ggml_permute(ctx0, Qcur, 0, 2, 1, 3); + + // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) + struct ggml_tensor *K = ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d( + ctx0, model.memory_k, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_k) * n_embd), + n_embd / n_head, n_head, n_past + N), + 0, 2, 1, 3); + + // K * Q + struct ggml_tensor *KQ = ggml_mul_mat(ctx0, K, Q); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + struct ggml_tensor *KQ_scaled = ggml_scale_inplace( + ctx0, KQ, ggml_new_f32(ctx0, 1.0f / sqrt(float(n_embd) / n_head))); + + // KQ_masked = mask_past(KQ_scaled) + struct ggml_tensor *KQ_masked = + ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past); + + // KQ = soft_max(KQ_masked) + struct ggml_tensor *KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); + + // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, + // 3).contiguous() + struct ggml_tensor *V = ggml_view_3d( + ctx0, model.memory_v, n_past + N, n_embd / n_head, n_head, + n_ctx * ggml_element_size(model.memory_v), + n_ctx * ggml_element_size(model.memory_v) * n_embd / n_head, + il * n_ctx * ggml_element_size(model.memory_v) * n_embd); + + // KQV = transpose(V) * KQ_soft_max + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + struct ggml_tensor *KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + + // cur = KQV_merged.contiguous().view(n_embd, N) + cur = ggml_cpy(ctx0, KQV_merged, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection (no bias) + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_proj_w, cur); + } + + struct ggml_tensor *inpFF = cur; + + // feed-forward network + // this is independent of the self-attention result, so it could be done in + // parallel to the self-attention + { + // note here we pass inpSA instead of cur + cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_fc_w, inpSA); + + cur = ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_fc_b, cur), + cur); + + // GELU activation + cur = ggml_gelu(ctx0, cur); + + // projection + // cur = proj_w*cur + proj_b + cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_proj_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_proj_b, cur), cur); + } + + // self-attention + FF + cur = ggml_add(ctx0, cur, inpFF); + + // input for next layer + inpL = ggml_add(ctx0, cur, inpL); + } + + // norm + { + inpL = ggml_norm(ctx0, inpL); + + // inpL = ln_f_g*inpL + ln_f_b + inpL = ggml_add(ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.ln_f_g, inpL), inpL), + ggml_repeat(ctx0, model.ln_f_b, inpL)); + } + + // lm_head + { + inpL = ggml_mul_mat(ctx0, model.lmh_g, inpL); + + inpL = ggml_add(ctx0, ggml_repeat(ctx0, model.lmh_b, inpL), inpL); + } + + // logits -> probs + // inpL = ggml_soft_max_inplace(ctx0, inpL); + + // run the computation + ggml_build_forward_expand(&gf, inpL); + ggml_graph_compute(ctx0, &gf); + + // if (n_past%100 == 0) { + // ggml_graph_print (&gf); + // ggml_graph_dump_dot(&gf, NULL, "gpt-j.dot"); + //} + + // embd_w.resize(n_vocab*N); + // memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); + + // return result for just the last token + embd_w.resize(n_vocab); + memcpy(embd_w.data(), (float *)ggml_get_data(inpL) + (n_vocab * (N - 1)), + sizeof(float) * n_vocab); + + if (mem_per_token == 0) { + mem_per_token = ggml_used_mem(ctx0) / N; + } + // printf("used_mem = %zu\n", ggml_used_mem(ctx0)); + + ggml_free(ctx0); + + return true; +} + +REGISTER_LLM(gptj); diff --git a/ctransformers/models/llms/llama.cc b/ctransformers/models/llms/llama.cc new file mode 100644 index 0000000000000000000000000000000000000000..6060a76fa085af4f96455aada152b651471d7e68 --- /dev/null +++ b/ctransformers/models/llms/llama.cc @@ -0,0 +1,111 @@ +#include "llm.h" + +// https://github.com/ggerganov/llama.cpp/blob/master/examples/main/main.cpp + +#include "ggml/llama.cpp" + +class llama_llm : public LLM { + public: + virtual ~llama_llm() { + if (ctx_ != nullptr) { + llama_free(ctx_); + } + } + + std::vector Tokenize(const std::string &text) const override { + return llama_tokenize(ctx_->vocab, text, /*bos=*/true); + } + + const std::string &Detokenize(const gpt_vocab::id id) const override { + if (id >= llama_n_vocab(ctx_)) { + return kEmptyString; + } + return ctx_->vocab.id_to_token[id].tok; + } + + bool IsEosToken(const gpt_vocab::id token) const override { + return token == EosToken(); + } + + gpt_vocab::id EosToken() const override { return llama_token_eos(); } + + int VocabSize() const override { return llama_n_vocab(ctx_); } + + std::vector &Logits() override { return ctx_->logits; } + + const std::vector &Embeddings() const override { + return ctx_->embedding; + } + + gpt_vocab::id Sample(const int top_k, const float top_p, + const float temperature, const float repetition_penalty, + int last_n_tokens, int seed) const override { + if (last_n_tokens < 0) { + last_n_tokens = ContextLength(); + } + if (seed < 0) { + seed = time(nullptr); + } + ctx_->rng.seed(seed); + + const float *logits = llama_get_logits(ctx_); + const int n_vocab = llama_n_vocab(ctx_); + + std::vector candidates; + candidates.reserve(n_vocab); + for (llama_token token_id = 0; token_id < n_vocab; token_id++) { + candidates.emplace_back( + llama_token_data{token_id, logits[token_id], 0.0f}); + } + + llama_token_data_array candidates_p = { + candidates.data(), + candidates.size(), + false, + }; + + { + std::unordered_set recent_tokens_set; + if (repetition_penalty != 1.0f) { + recent_tokens_set = previous_tokens_.GetRecent(last_n_tokens); + } + std::vector recent_tokens(recent_tokens_set.begin(), + recent_tokens_set.end()); + llama_sample_repetition_penalty(ctx_, &candidates_p, recent_tokens.data(), + recent_tokens.size(), repetition_penalty); + } + + llama_sample_top_k(ctx_, &candidates_p, top_k, 1); + llama_sample_top_p(ctx_, &candidates_p, top_p, 1); + llama_sample_temperature(ctx_, &candidates_p, temperature); + return llama_sample_token(ctx_, &candidates_p); + } + + protected: + bool Load(const std::string &filename, const int context_length, + const int gpu_layers) override { + llama_context_params params = llama_context_default_params(); + params.embedding = true; + if (context_length > 0) { + params.n_ctx = context_length; + } + params.n_gpu_layers = gpu_layers; + + ctx_ = llama_init_from_file(filename.c_str(), params); + if (ctx_ == nullptr) { + return false; + } + n_ctx_ = llama_n_ctx(ctx_); + return true; + } + + bool Eval(const std::vector &tokens, const int threads, + const int n_past) override { + const int status = + llama_eval(ctx_, tokens.data(), tokens.size(), n_past, threads); + return status == 0; + } + + private: + llama_context *ctx_ = nullptr; +}; diff --git a/ctransformers/models/llms/mpt.cc b/ctransformers/models/llms/mpt.cc new file mode 100644 index 0000000000000000000000000000000000000000..96cb71676e548222600a7007ca51eda41be461f2 --- /dev/null +++ b/ctransformers/models/llms/mpt.cc @@ -0,0 +1,595 @@ +#include "llm.h" + +// https://github.com/ggerganov/ggml/blob/master/examples/mpt/main.cpp + +// no defaults for now +struct mpt_hparams { + int32_t d_model = 0; + int32_t max_seq_len = 0; + int32_t n_heads = 0; + int32_t n_layers = 0; + int32_t n_vocab = 0; + float alibi_bias_max = 0; + float clip_qkv = 0; + int32_t ftype = 0; + int32_t n_ctx = 2048; +}; + +struct mpt_layer { + // pre normalization + struct ggml_tensor *norm_1_weight; + + // attention + struct ggml_tensor *c_attn_wqkv_weight; + struct ggml_tensor *c_attn_out_proj_weight; + + // post normalization + struct ggml_tensor *norm_2_weight; + + // ff + struct ggml_tensor *ffn_up_proj; + struct ggml_tensor *ffn_down_proj; +}; + +struct mpt_model { + mpt_hparams hparams; + + struct ggml_tensor *wte_weight; // position embedding + struct ggml_tensor *norm_f_weight; // language model head + + std::vector layers; + + // key + value memory + struct ggml_tensor *memory_k; + struct ggml_tensor *memory_v; + + struct ggml_context *ctx; + std::map tensors; +}; + +// load the model's weights from a file +bool mpt_model_load(const std::string &fname, mpt_model &model, + gpt_vocab &vocab) { + auto fin = std::ifstream(fname, std::ios::binary); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); + return false; + } + + // verify magic + { + uint32_t magic; + fin.read((char *)&magic, sizeof(magic)); + if (magic != 0x67676d6c) { + fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, + fname.c_str()); + return false; + } + } + + // load hparams + { + auto &hparams = model.hparams; + + fin.read((char *)&hparams.d_model, sizeof(hparams.d_model)); + fin.read((char *)&hparams.max_seq_len, sizeof(hparams.max_seq_len)); + fin.read((char *)&hparams.n_heads, sizeof(hparams.n_heads)); + fin.read((char *)&hparams.n_layers, sizeof(hparams.n_layers)); + fin.read((char *)&hparams.n_vocab, sizeof(hparams.n_vocab)); + fin.read((char *)&hparams.alibi_bias_max, sizeof(hparams.alibi_bias_max)); + fin.read((char *)&hparams.clip_qkv, sizeof(hparams.clip_qkv)); + fin.read((char *)&hparams.ftype, sizeof(hparams.ftype)); + + hparams.n_ctx = std::min(hparams.max_seq_len, hparams.n_ctx); + + const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; + + hparams.ftype %= GGML_QNT_VERSION_FACTOR; + } + + // load vocab + { + const int32_t n_vocab = model.hparams.n_vocab; + + std::string word; + std::vector buf(128); + + for (int i = 0; i < n_vocab; i++) { + uint32_t len; + fin.read((char *)&len, sizeof(len)); + + buf.resize(len); + fin.read((char *)buf.data(), len); + word.assign(buf.data(), len); + + // Convert token from utf-8 + std::wstring word_multibytes = convert_to_wstring(word); + word.resize(word_multibytes.size()); + for (int w = 0; w < (int)word_multibytes.size(); w++) { + word[w] = uint8_t(word_multibytes[w]); + } + + vocab.token_to_id[word] = i; + vocab.id_to_token[i] = word; + } + } + + // for the big tensors, we have the option to store the data in 16-bit + // floats or quantized in order to save memory and also to speed up the + // computation + ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype)(model.hparams.ftype)); + if (wtype == GGML_TYPE_COUNT) { + fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", + __func__, fname.c_str(), model.hparams.ftype); + return false; + } + + auto &ctx = model.ctx; + + size_t ctx_size = 0; + + const auto &hparams = model.hparams; + const size_t n_ctx = hparams.n_ctx; + + { + const size_t n_embd = hparams.d_model; + const size_t n_layer = hparams.n_layers; + const size_t n_vocab = hparams.n_vocab; + + ctx_size += n_embd * n_vocab * ggml_type_sizef(wtype); // wte_weight + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // norm_f_weight + + ctx_size += + n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_weight + ctx_size += n_layer * (3 * n_embd * n_embd * + ggml_type_sizef(wtype)); // attn_Wqkv_weight + ctx_size += n_layer * (n_embd * n_embd * + ggml_type_sizef(wtype)); // attn_out_proj_weight + ctx_size += + n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_weight + ctx_size += n_layer * (4 * n_embd * n_embd * + ggml_type_sizef(wtype)); // mlp_mlp_up_weight + ctx_size += n_layer * (n_embd * n_embd * 4 * + ggml_type_sizef(wtype)); // mlp_mlp_down_weight + + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F16); // memory_k + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F16); // memory_v + + ctx_size += (1 + 6 * n_layer) * 512; // object overhead + } + + // create the ggml context + { + struct ggml_init_params params = { + /*.mem_size =*/ctx_size, + /*.mem_buffer =*/NULL, + /*.no_alloc =*/false, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return false; + } + } + + // prepare memory for the weights + { + const auto &hparams = model.hparams; + + const size_t n_embd = hparams.d_model; + const size_t n_layer = hparams.n_layers; + const size_t n_vocab = hparams.n_vocab; + + model.layers.resize(n_layer); + + model.wte_weight = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + model.norm_f_weight = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + // map by name + model.tensors["transformer.wte.weight"] = model.wte_weight; + model.tensors["transformer.norm_f.weight"] = model.norm_f_weight; + + for (int i = 0; i < (int)n_layer; ++i) { + auto &layer = model.layers[i]; + + layer.norm_1_weight = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.c_attn_wqkv_weight = + ggml_new_tensor_2d(ctx, wtype, n_embd, 3 * n_embd); + layer.c_attn_out_proj_weight = + ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.norm_2_weight = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ffn_up_proj = ggml_new_tensor_2d(ctx, wtype, n_embd, 4 * n_embd); + layer.ffn_down_proj = ggml_new_tensor_2d(ctx, wtype, 4 * n_embd, n_embd); + + // map by name + model.tensors["transformer.blocks." + std::to_string(i) + + ".norm_1.weight"] = layer.norm_1_weight; + model.tensors["transformer.blocks." + std::to_string(i) + + ".attn.Wqkv.weight"] = layer.c_attn_wqkv_weight; + model.tensors["transformer.blocks." + std::to_string(i) + + ".attn.out_proj.weight"] = layer.c_attn_out_proj_weight; + model.tensors["transformer.blocks." + std::to_string(i) + + ".norm_2.weight"] = layer.norm_2_weight; + model.tensors["transformer.blocks." + std::to_string(i) + + ".ffn.up_proj.weight"] = layer.ffn_up_proj; + model.tensors["transformer.blocks." + std::to_string(i) + + ".ffn.down_proj.weight"] = layer.ffn_down_proj; + } + } + + // key + value memory + { + const auto &hparams = model.hparams; + + const size_t n_embd = hparams.d_model; + const size_t n_layer = hparams.n_layers; + + const int64_t n_mem = n_layer * n_ctx; + const int64_t n_elements = n_embd * n_mem; + + model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + + const size_t memory_size = + ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); + } + + // load weights + { + int n_tensors = 0; + size_t total_size = 0; + + while (true) { + int32_t n_dims; + int32_t length; + int32_t ttype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ttype), sizeof(ttype)); + + if (fin.eof()) { + break; + } + + int32_t nelements = 1; + int32_t ne[2] = {1, 1}; + for (int i = 0; i < n_dims; ++i) { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + + std::string name(length, 0); + fin.read(&name[0], length); + + if (model.tensors.find(name.data()) == model.tensors.end()) { + fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, + name.data()); + return false; + } + + auto tensor = model.tensors[name.data()]; + if (ggml_nelements(tensor) != nelements) { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", + __func__, name.data()); + return false; + } + + if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { + fprintf(stderr, + "%s: tensor '%s' has wrong shape in model file: got [%5d, " + "%5d], expected [%5d, %5d]\n", + __func__, name.data(), (int)tensor->ne[0], (int)tensor->ne[1], + ne[0], ne[1]); + return false; + } + + // for debugging + if (0) { + printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", + name.data(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), + ggml_nbytes(tensor) / 1024.0 / 1024.0, ggml_nbytes(tensor)); + } + + const size_t bpe = ggml_type_size(ggml_type(ttype)); + + if ((nelements * bpe) / ggml_blck_size(tensor->type) != + ggml_nbytes(tensor)) { + fprintf(stderr, + "%s: tensor '%s' has wrong size in model file: got %zu, " + "expected %zu\n", + __func__, name.data(), ggml_nbytes(tensor), nelements * bpe); + return false; + } + + fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + + total_size += ggml_nbytes(tensor); + } + } + + fin.close(); + + return true; +} + +// evaluate the transformer +// +// - model: the model +// - n_threads: number of threads to use +// - n_past: the context size so far +// - embd_inp: the embeddings of the tokens in the context +// - embd_w: the predicted logits for the next token +// +bool mpt_eval(const mpt_model &model, const int n_threads, const int n_past, + const std::vector &embd_inp, + std::vector &embd_w, size_t &mem_per_token) { + const bool logits_all = false; + const int N = embd_inp.size(); + + const auto &hparams = model.hparams; + + const int n_embd = hparams.d_model; + const int n_layer = hparams.n_layers; + const int n_head = hparams.n_heads; + const int n_vocab = hparams.n_vocab; + const int n_ctx = hparams.n_ctx; + + static size_t buf_size = 256u * 1024 * 1024; + static void *buf = malloc(buf_size); + + // use 2 scratch buffers + // TODO: very hacky solution - reimplement in a more elegant way + static size_t scr0_size = 256u * 1024 * 1024; + static void *scr0 = malloc(scr0_size); + + static size_t scr1_size = 256u * 1024 * 1024; + static void *scr1 = malloc(scr1_size); + + if (mem_per_token > 0 && mem_per_token * N > buf_size) { + const size_t buf_size_new = + 1.1 * + (mem_per_token * N); // add 10% to account for ggml object overhead + // printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, + // buf_size, buf_size_new); + + // reallocate + buf_size = buf_size_new; + buf = realloc(buf, buf_size); + if (buf == nullptr) { + fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); + return false; + } + } + + struct ggml_init_params params = { + /*.mem_size =*/buf_size, + /*.mem_buffer =*/buf, + /*.no_alloc =*/false, + }; + + struct ggml_context *ctx0 = ggml_init(params); + struct ggml_cgraph gf = {}; + gf.n_threads = n_threads; + + struct ggml_tensor *embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd)); + + struct ggml_tensor *inpL = ggml_get_rows(ctx0, model.wte_weight, embd); + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor *cur; + + ggml_set_scratch(ctx0, { + 0, + scr0_size, + scr0, + }); + + // a = self.ln_1(x) + { + cur = ggml_norm(ctx0, inpL); + + cur = ggml_mul( + ctx0, ggml_repeat(ctx0, model.layers[il].norm_1_weight, cur), cur); + } + + // self-attention + // b, _, past_key_value = self.attn(a, past_key_value=past_key_value, + // attn_bias=attn_bias, attention_mask=attention_mask, + // is_causal=is_causal) + { + // compute QKV + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_wqkv_weight, cur); + + if (model.hparams.clip_qkv > 0.0f) { + cur = ggml_clamp(ctx0, cur, -model.hparams.clip_qkv, + model.hparams.clip_qkv); + } + + struct ggml_tensor *Qcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], + 0 * sizeof(float) * n_embd); + struct ggml_tensor *Kcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], + 1 * sizeof(float) * n_embd); + struct ggml_tensor *Vcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], + 2 * sizeof(float) * n_embd); + + // store key and value to memory + { + struct ggml_tensor *k = + ggml_view_1d(ctx0, model.memory_k, N * n_embd, + (ggml_element_size(model.memory_k) * n_embd) * + (il * n_ctx + n_past)); + struct ggml_tensor *v = + ggml_view_1d(ctx0, model.memory_v, N * n_embd, + (ggml_element_size(model.memory_v) * n_embd) * + (il * n_ctx + n_past)); + + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, + // 2, 1, 3) [64, N, 12] + struct ggml_tensor *Q = + ggml_permute(ctx0, + ggml_cpy(ctx0, Qcur, + ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, + n_embd / n_head, n_head, N)), + 0, 2, 1, 3); + + // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, + // 3) [64, n_past + N, 12] + struct ggml_tensor *K = ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d( + ctx0, model.memory_k, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_k) * n_embd), + n_embd / n_head, n_head, n_past + N), + 0, 2, 1, 3); + // K * Q + struct ggml_tensor *KQ = ggml_mul_mat(ctx0, K, Q); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + struct ggml_tensor *KQ_scaled = ggml_scale( + ctx0, KQ, ggml_new_f32(ctx0, 1.0f / sqrt(float(n_embd) / n_head))); + + struct ggml_tensor *KQ_scaled_alibi = ggml_alibi( + ctx0, KQ_scaled, n_past, n_head, model.hparams.alibi_bias_max); + + // KQ_masked = mask_past(KQ_scaled) + struct ggml_tensor *KQ_masked = + ggml_diag_mask_inf(ctx0, KQ_scaled_alibi, n_past); + + // KQ = soft_max(KQ_masked) + struct ggml_tensor *KQ_soft_max = ggml_soft_max(ctx0, KQ_masked); + + // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, + // 2, 0, 3).contiguous() [n_past + N, 64, 12] + struct ggml_tensor *V_trans = ggml_cpy( + ctx0, + ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d( + ctx0, model.memory_v, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_v) * n_embd), + n_embd / n_head, n_head, n_past + N), + 1, 2, 0, 3), + ggml_new_tensor_3d(ctx0, model.memory_v->type, n_past + N, + n_embd / n_head, n_head)); + + // KQV = transpose(V) * KQ_soft_max + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + struct ggml_tensor *KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + + // cur = KQV_merged.contiguous().view(n_embd, N) + cur = ggml_cpy(ctx0, KQV_merged, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_out_proj_weight, cur); + } + } + + inpL = ggml_add(ctx0, inpL, cur); + + ggml_set_scratch(ctx0, { + 0, + scr1_size, + scr1, + }); + + // m = self.ln_2(x) + { + cur = ggml_norm(ctx0, inpL); + + cur = ggml_mul( + ctx0, ggml_repeat(ctx0, model.layers[il].norm_2_weight, cur), cur); + } + + // n = self.mlp(m) + { + cur = ggml_mul_mat(ctx0, model.layers[il].ffn_up_proj, cur); + + // GELU activation + cur = ggml_gelu(ctx0, cur); + + // projection + // cur = proj_w*cur + proj_b + cur = ggml_mul_mat(ctx0, model.layers[il].ffn_down_proj, cur); + } + + // x = x + n + inpL = ggml_add(ctx0, inpL, cur); + } + + ggml_set_scratch(ctx0, { + 0, + scr0_size, + scr0, + }); + + // norm + { + inpL = ggml_norm(ctx0, inpL); + // inpL = ln_f_g*inpL + inpL = ggml_mul(ctx0, ggml_repeat(ctx0, model.norm_f_weight, inpL), inpL); + } + + ggml_set_scratch(ctx0, { + 0, + 0, + nullptr, + }); + + // output embedding weight tied to input embedding + inpL = ggml_mul_mat(ctx0, model.wte_weight, inpL); + + // logits -> probs + // inpL = ggml_soft_max(ctx0, inpL); + + // run the computation + ggml_build_forward_expand(&gf, inpL); + ggml_graph_compute(ctx0, &gf); + + // std::cout << "Qcur" << std::endl; + // print_tensor(Qcur); + + // if (n_past%100 == 0) { + // ggml_graph_print(&gf); + // ggml_graph_dump_dot(&gf, NULL, "mpt-model.dot"); + // } + + if (logits_all) { + // return result for all tokens + embd_w.resize(n_vocab * N); + memcpy(embd_w.data(), (float *)ggml_get_data(inpL), + sizeof(float) * n_vocab * N); + } else { + // return result for just the last token + embd_w.resize(n_vocab); + memcpy(embd_w.data(), (float *)ggml_get_data(inpL) + (n_vocab * (N - 1)), + sizeof(float) * n_vocab); + } + + if (mem_per_token == 0) { + mem_per_token = ggml_used_mem(ctx0) / N; + } + // printf("used_mem = %zu\n", ggml_used_mem(ctx0)); + + ggml_free(ctx0); + + return true; +} + +REGISTER_LLM(mpt); diff --git a/ctransformers/models/llms/replit.cc b/ctransformers/models/llms/replit.cc new file mode 100644 index 0000000000000000000000000000000000000000..b5932d1b08f0eb7fe73f7cabd214b5ddd687e345 --- /dev/null +++ b/ctransformers/models/llms/replit.cc @@ -0,0 +1,726 @@ +#include "llm.h" + +// code modified from https://github.com/ggerganov/ggml/blob/master/examples/replit/main.cpp + +// no defaults for now +struct replit_hparams +{ + int32_t d_model = 0; + int32_t max_seq_len = 0; + int32_t n_heads = 0; + int32_t n_layers = 0; + int32_t n_vocab = 0; + int32_t ftype = 0; + int32_t n_ctx = 2048; +}; + +using piece_t = std::pair; +using piece_map_t = std::unordered_map; + +struct replit_tokenizer +{ + gpt_vocab raw_vocab; + piece_map_t piece_map; + std::vector vocab; +}; + +struct replit_layer +{ + // pre normalization + struct ggml_tensor *norm_1_weight; + + // attention + struct ggml_tensor *c_attn_wqkv_weight; + struct ggml_tensor *c_attn_out_proj_weight; + + // post normalization + struct ggml_tensor *norm_2_weight; + + // ff + struct ggml_tensor *ffn_up_proj; + struct ggml_tensor *ffn_down_proj; +}; + +struct replit_model +{ + replit_hparams hparams; + + struct ggml_tensor *wte_weight; // position embedding + struct ggml_tensor *norm_f_weight; // language model head + + std::vector layers; + + // key + value memory + struct ggml_tensor *memory_k; + struct ggml_tensor *memory_v; + + struct ggml_context *ctx; + std::map tensors; +}; + +std::pair, float> encode_word(const std::string &word, const piece_map_t &model) +{ + std::vector best_segmentations_starts(word.length() + 1, -1); + best_segmentations_starts[0] = 0; + + std::vector best_segmentations_scores(word.length() + 1, -std::numeric_limits::infinity()); + best_segmentations_scores[0] = 1.0; + + for (int start_idx = 0; start_idx < word.length(); ++start_idx) + { + float best_score_at_start = best_segmentations_scores[start_idx]; + for (int end_idx = start_idx + 1; end_idx <= word.length(); ++end_idx) + { + std::string token = word.substr(start_idx, end_idx - start_idx); + if (model.count(token) && best_score_at_start != -std::numeric_limits::infinity()) + { + float token_score = model.at(token).second; + float score = token_score + best_score_at_start; + if (best_segmentations_scores[end_idx] == -std::numeric_limits::infinity() || + best_segmentations_scores[end_idx] > score) + { + best_segmentations_starts[end_idx] = start_idx; + best_segmentations_scores[end_idx] = score; + } + } + } + } + + if (best_segmentations_scores.back() == -std::numeric_limits::infinity()) + { + return std::make_pair(std::vector{0}, 0.0f); + } + + float score = best_segmentations_scores.back(); + int start = best_segmentations_starts.back(); + int end = word.length(); + std::vector tokens; + while (start != 0) + { + const auto token_id = model.at(word.substr(start, end - start)).first; + tokens.insert(tokens.begin(), token_id); + int next_start = best_segmentations_starts[start]; + end = start; + start = next_start; + } + const auto token_id = model.at(word.substr(start, end - start)).first; + tokens.insert(tokens.begin(), token_id); + return std::make_pair(tokens, score); +} + +bool replit_tokenizer_load(replit_tokenizer &tokenizer, std::istream &fin, int max_vocab_size) +{ + std::string word; + std::vector buf(128); + + for (std::size_t i = 0; i < max_vocab_size; i++) + { + uint32_t len; + fin.read((char *)&len, sizeof(len)); + + buf.resize(len); + fin.read((char *)buf.data(), len); + word.assign(buf.data(), len); + + float score; + fin.read((char *)&score, sizeof(score)); + + tokenizer.piece_map[word] = std::make_pair(i, -score); + tokenizer.raw_vocab.id_to_token[i] = word; + } + + return true; +} + +std::string replace_all(const std::string &str, // where to work + const std::string &find, // substitute 'find' + const std::string &replace // by 'replace' +) +{ + using namespace std; + string result; + size_t find_len = find.size(); + size_t pos, from = 0; + while (string::npos != (pos = str.find(find, from))) + { + result.append(str, from, pos - from); + result.append(replace); + from = pos + find_len; + } + result.append(str, from, string::npos); + return result; +} + +std::string ws_symbol = "\342\226\201"; +std::vector replit_tokenizer_tokenize(const replit_tokenizer &tokenizer, const std::string &text) +{ + std::vector tokens; + auto normalized_text = replace_all(text, " ", ws_symbol); + auto tokenized = encode_word(normalized_text, tokenizer.piece_map); + + return tokenized.first; +} + +// load the model's weights from a file +bool replit_model_load(const std::string &fname, replit_model &model, replit_tokenizer &tokenizer) +{ + auto fin = std::ifstream(fname, std::ios::binary); + if (!fin) + { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); + return false; + } + + // verify magic + { + uint32_t magic; + fin.read((char *)&magic, sizeof(magic)); + if (magic != 0x67676d6c) + { + fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); + return false; + } + } + + // load hparams + { + auto &hparams = model.hparams; + + fin.read((char *)&hparams.d_model, sizeof(hparams.d_model)); + fin.read((char *)&hparams.max_seq_len, sizeof(hparams.max_seq_len)); + fin.read((char *)&hparams.n_heads, sizeof(hparams.n_heads)); + fin.read((char *)&hparams.n_layers, sizeof(hparams.n_layers)); + fin.read((char *)&hparams.n_vocab, sizeof(hparams.n_vocab)); + fin.read((char *)&hparams.ftype, sizeof(hparams.ftype)); + + hparams.n_ctx = std::min(hparams.max_seq_len, hparams.n_ctx); + + const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; + + hparams.ftype %= GGML_QNT_VERSION_FACTOR; + } + + // load vocab + replit_tokenizer_load(tokenizer, fin, model.hparams.n_vocab); + + // for the big tensors, we have the option to store the data in 16-bit + // floats or quantized in order to save memory and also to speed up the + // computation + ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype)(model.hparams.ftype)); + if (wtype == GGML_TYPE_COUNT) + { + fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", __func__, fname.c_str(), + model.hparams.ftype); + return false; + } + + auto &ctx = model.ctx; + + size_t ctx_size = 0; + + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.d_model; + const int n_layer = hparams.n_layers; + const int n_ctx = hparams.max_seq_len; + const int n_vocab = hparams.n_vocab; + + ctx_size += n_embd * n_vocab * ggml_type_sizef(wtype); // wte_weight + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_weight + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_weight + ctx_size += n_layer * (3 * n_embd * n_embd * ggml_type_sizef(wtype)); // attn_Wqkv_weight + ctx_size += n_layer * (n_embd * n_embd * ggml_type_sizef(wtype)); // attn_out_proj_weight + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_weight + ctx_size += n_layer * (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // mlp_mlp_up_weight + ctx_size += n_layer * (n_embd * n_embd * 4 * ggml_type_sizef(wtype)); // mlp_mlp_down_weight + + ctx_size += n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F16); // memory_k + ctx_size += n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F16); // memory_v + + ctx_size += (1 + 6 * n_layer) * 512; // object overhead + } + + // create the ggml context + { + struct ggml_init_params params = { + /*.mem_size =*/ctx_size, + /*.mem_buffer =*/NULL, + /*.no_alloc =*/false, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) + { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return false; + } + } + + // prepare memory for the weights + { + const auto &hparams = model.hparams; + + const size_t n_embd = hparams.d_model; + const size_t n_layer = hparams.n_layers; + const size_t n_vocab = hparams.n_vocab; + + model.layers.resize(n_layer); + + model.wte_weight = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + model.norm_f_weight = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + // map by name + model.tensors["transformer.wte.weight"] = model.wte_weight; + model.tensors["transformer.norm_f.weight"] = model.norm_f_weight; + + for (int i = 0; i < (int)n_layer; ++i) + { + auto &layer = model.layers[i]; + + layer.norm_1_weight = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.c_attn_wqkv_weight = ggml_new_tensor_2d(ctx, wtype, n_embd, 3 * n_embd); + layer.c_attn_out_proj_weight = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.norm_2_weight = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ffn_up_proj = ggml_new_tensor_2d(ctx, wtype, n_embd, 4 * n_embd); + layer.ffn_down_proj = ggml_new_tensor_2d(ctx, wtype, 4 * n_embd, n_embd); + + // map by name + model.tensors["transformer.blocks." + std::to_string(i) + ".norm_1.weight"] = layer.norm_1_weight; + model.tensors["transformer.blocks." + std::to_string(i) + ".attn.Wqkv.weight"] = layer.c_attn_wqkv_weight; + model.tensors["transformer.blocks." + std::to_string(i) + ".attn.out_proj.weight"] = + layer.c_attn_out_proj_weight; + model.tensors["transformer.blocks." + std::to_string(i) + ".norm_2.weight"] = layer.norm_2_weight; + model.tensors["transformer.blocks." + std::to_string(i) + ".ffn.up_proj.weight"] = layer.ffn_up_proj; + model.tensors["transformer.blocks." + std::to_string(i) + ".ffn.down_proj.weight"] = layer.ffn_down_proj; + } + } + + // key + value memory + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.d_model; + const int n_layer = hparams.n_layers; + const int n_ctx = hparams.max_seq_len; + + const int64_t n_mem = n_layer * n_ctx; + const int64_t n_elements = n_embd * n_mem; + + model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F16, n_elements); + + const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); + } + + // load weights + { + while (true) + { + int32_t n_dims; + int32_t length; + int32_t ttype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ttype), sizeof(ttype)); + + if (fin.eof()) + { + break; + } + + int32_t nelements = 1; + int32_t ne[2] = {1, 1}; + for (int i = 0; i < n_dims; ++i) + { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + + std::string name(length, 0); + fin.read(&name[0], length); + + if (model.tensors.find(name.data()) == model.tensors.end()) + { + fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data()); + return false; + } + + auto tensor = model.tensors[name.data()]; + if (ggml_nelements(tensor) != nelements) + { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data()); + return false; + } + + if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) + { + fprintf(stderr, + "%s: tensor '%s' has wrong shape in model file: got [%5d, " + "%5d], expected [%5d, %5d]\n", + __func__, name.data(), (int)tensor->ne[0], (int)tensor->ne[1], ne[0], ne[1]); + return false; + } + + // for debugging + if (0) + { + printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], + ggml_type_name(ggml_type(ttype)), ggml_nbytes(tensor) / 1024.0 / 1024.0, ggml_nbytes(tensor)); + } + + const size_t bpe = ggml_type_size(ggml_type(ttype)); + + if ((nelements * bpe) / ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) + { + fprintf(stderr, + "%s: tensor '%s' has wrong size in model file: got %zu, " + "expected %zu\n", + __func__, name.data(), ggml_nbytes(tensor), nelements * bpe); + return false; + } + + fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + } + } + + fin.close(); + + return true; +} + +// evaluate the transformer +// +// - model: the model +// - n_threads: number of threads to use +// - n_past: the context size so far +// - embd_inp: the embeddings of the tokens in the context +// - embd_w: the predicted logits for the next token +// +bool replit_eval(const replit_model &model, const int n_threads, const int n_past, + const std::vector &embd_inp, + std::vector &embd_w, size_t &mem_per_token) +{ + const bool logits_all = false; + const int N = embd_inp.size(); + + const auto &hparams = model.hparams; + + const int n_embd = hparams.d_model; + const int n_layer = hparams.n_layers; + const int n_head = hparams.n_heads; + const int n_vocab = hparams.n_vocab; + const int n_ctx = hparams.max_seq_len; + + static size_t buf_size = 256u * 1024 * 1024; + static void *buf = malloc(buf_size); + + if (mem_per_token > 0 && mem_per_token * N > buf_size) + { + const size_t buf_size_new = 1.1 * (mem_per_token * N); // add 10% to account for ggml object overhead + // printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, + // buf_size, buf_size_new); + + // reallocate + buf_size = buf_size_new; + buf = realloc(buf, buf_size); + if (buf == nullptr) + { + fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); + return false; + } + } + + struct ggml_init_params params = { + /*.mem_size =*/buf_size, + /*.mem_buffer =*/buf, + /*.no_alloc =*/false, + }; + + struct ggml_context *ctx0 = ggml_init(params); + struct ggml_cgraph gf = {}; + gf.n_threads = n_threads; + + struct ggml_tensor *embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd)); + + struct ggml_tensor *inpL = ggml_get_rows(ctx0, model.wte_weight, embd); + + for (int il = 0; il < n_layer; ++il) + { + + struct ggml_tensor *cur; + + // a = self.ln_1(x) + { + cur = ggml_norm(ctx0, inpL); + + cur = ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].norm_1_weight, cur), cur); + } + + // self-attention + // b, _, past_key_value = self.attn(a, past_key_value=past_key_value, + // attn_bias=attn_bias, attention_mask=attention_mask, + // is_causal=is_causal) + { + // compute QKV + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_wqkv_weight, cur); + + struct ggml_tensor *Qcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 0 * sizeof(float) * n_embd); + struct ggml_tensor *Kcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 1 * sizeof(float) * n_embd); + struct ggml_tensor *Vcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], 2 * sizeof(float) * n_embd); + + // store key and value to memory + { + struct ggml_tensor *k = + ggml_view_1d(ctx0, model.memory_k, N * n_embd, + (ggml_element_size(model.memory_k) * n_embd) * (il * n_ctx + n_past)); + struct ggml_tensor *v = + ggml_view_1d(ctx0, model.memory_v, N * n_embd, + (ggml_element_size(model.memory_v) * n_embd) * (il * n_ctx + n_past)); + + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, + // 2, 1, 3) [64, N, 12] + struct ggml_tensor *Q = ggml_permute( + ctx0, ggml_cpy(ctx0, Qcur, ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_embd / n_head, n_head, N)), 0, 2, + 1, 3); + + // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, + // 3) [64, n_past + N, 12] + struct ggml_tensor *K = + ggml_permute(ctx0, + ggml_reshape_3d(ctx0, + ggml_view_1d(ctx0, model.memory_k, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_k) * n_embd), + n_embd / n_head, n_head, n_past + N), + 0, 2, 1, 3); + // K * Q + struct ggml_tensor *KQ = ggml_mul_mat(ctx0, K, Q); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + struct ggml_tensor *KQ_scaled = + ggml_scale(ctx0, KQ, ggml_new_f32(ctx0, 1.0f / sqrt(float(n_embd) / n_head))); + + struct ggml_tensor *KQ_scaled_alibi = ggml_alibi(ctx0, KQ_scaled, n_past, n_head, 8.0f); + + // KQ_masked = mask_past(KQ_scaled) + struct ggml_tensor *KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled_alibi, n_past); + + // KQ = soft_max(KQ_masked) + struct ggml_tensor *KQ_soft_max = ggml_soft_max(ctx0, KQ_masked); + + // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, + // 2, 0, 3).contiguous() [n_past + N, 64, 12] + struct ggml_tensor *V_trans = ggml_cpy( + ctx0, + ggml_permute(ctx0, + ggml_reshape_3d(ctx0, + ggml_view_1d(ctx0, model.memory_v, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_v) * n_embd), + n_embd / n_head, n_head, n_past + N), + 1, 2, 0, 3), + ggml_new_tensor_3d(ctx0, model.memory_v->type, n_past + N, n_embd / n_head, n_head)); + + // KQV = transpose(V) * KQ_soft_max + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + struct ggml_tensor *KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + + // cur = KQV_merged.contiguous().view(n_embd, N) + cur = ggml_cpy(ctx0, KQV_merged, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_out_proj_weight, cur); + } + } + + inpL = ggml_add(ctx0, inpL, cur); + + // m = self.ln_2(x) + { + cur = ggml_norm(ctx0, inpL); + + cur = ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].norm_2_weight, cur), cur); + } + + // n = self.mlp(m) + { + + cur = ggml_mul_mat(ctx0, model.layers[il].ffn_up_proj, cur); + + // GELU activation + cur = ggml_gelu(ctx0, cur); + + // projection + // cur = proj_w*cur + proj_b + cur = ggml_mul_mat(ctx0, model.layers[il].ffn_down_proj, cur); + } + + // x = x + n + inpL = ggml_add(ctx0, inpL, cur); + } + + // norm + { + inpL = ggml_norm(ctx0, inpL); + // inpL = ln_f_g*inpL + inpL = ggml_mul(ctx0, ggml_repeat(ctx0, model.norm_f_weight, inpL), inpL); + } + + // output embedding weight tied to input embedding + inpL = ggml_mul_mat(ctx0, model.wte_weight, inpL); + + // run the computation + ggml_build_forward_expand(&gf, inpL); + ggml_graph_compute(ctx0, &gf); + + if (logits_all) + { + // return result for all tokens + embd_w.resize(n_vocab * N); + memcpy(embd_w.data(), (float *)ggml_get_data(inpL), sizeof(float) * n_vocab * N); + } + else + { + // return result for just the last token + embd_w.resize(n_vocab); + memcpy(embd_w.data(), (float *)ggml_get_data(inpL) + (n_vocab * (N - 1)), sizeof(float) * n_vocab); + } + + if (mem_per_token == 0) + { + mem_per_token = ggml_used_mem(ctx0) / N; + } + // printf("used_mem = %zu\n", ggml_used_mem(ctx0)); + + ggml_free(ctx0); + + return true; +} + +class replit_llm : public LLM +{ +public: + virtual ~replit_llm() + { + if (model_.ctx != nullptr) + { + ggml_free(model_.ctx); + } + } + + std::vector Tokenize(const std::string &text) const override + { + // tokenize the prompt + std::vector embd_inp = replit_tokenizer_tokenize(replit_tokenizer_, text); + + return embd_inp; + } + + const std::string &Detokenize(const gpt_vocab::id id) const override + { + + const auto it = replit_tokenizer_.raw_vocab.id_to_token.find(id); + if (it == replit_tokenizer_.raw_vocab.id_to_token.end()) + { + return kEmptyString; + } + + current_word_ = replace_all(replit_tokenizer_.raw_vocab.id_to_token.at(id), ws_symbol, " "); + + return current_word_; + } + + bool IsEosToken(const gpt_vocab::id token) const override + { + if (token == EosToken()) + { + return true; + } + // Handle special tokens in StarChat and Dolly V2. + if (!replit_tokenizer_.raw_vocab.special_tokens.empty()) + { + const std::string &text = Detokenize(token); + return text == "<|end|>" || text == "### End"; + } + return false; + } + + gpt_vocab::id EosToken() const override + { + const auto it = replit_tokenizer_.raw_vocab.token_to_id.find("<|endoftext|>"); + if (it != replit_tokenizer_.raw_vocab.token_to_id.end()) + { + return it->second; + } + return 0; + } + + int VocabSize() const override { return replit_tokenizer_.raw_vocab.id_to_token.size(); } + + gpt_vocab::id Sample(const int top_k, const float top_p, + const float temperature, + const float repetition_penalty, + int last_n_tokens, int seed) const override + { + if (logits_.empty()) + { + return EosToken(); + } + if (last_n_tokens < 0) + { + last_n_tokens = ContextLength(); + } + if (seed < 0) + { + seed = time(nullptr); + } + std::mt19937 rng(seed); + + std::unordered_set recent_tokens; + if (repetition_penalty != 1.0f) + { + recent_tokens = previous_tokens_.GetRecent(last_n_tokens); + } + + return gpt_sample_top_k_top_p( + replit_tokenizer_.raw_vocab, logits_.data() + (logits_.size() - VocabSize()), top_k, top_p, + temperature, repetition_penalty, recent_tokens, rng); + } + +protected: + replit_tokenizer replit_tokenizer_; + bool Load(const std::string &filename, const int context_length, + const int gpu_layers) override + { + if (context_length > 0) + { + model_.hparams.n_ctx = context_length; + } + if (!replit_model_load(filename, model_, replit_tokenizer_)) + { + return false; + } + n_ctx_ = model_.hparams.n_ctx; + return true; + } + + bool Eval(const std::vector &tokens, const int threads, + const int n_past) override + { + return replit_eval(model_, threads, n_past, tokens, logits_, + mem_per_token_); + } + +private: + replit_model model_; + mutable std::string current_word_; +}; diff --git a/ctransformers/models/llms/starcoder.cc b/ctransformers/models/llms/starcoder.cc new file mode 100644 index 0000000000000000000000000000000000000000..113cca7741f71164261e78b0eeb3cf0f8f9b9adb --- /dev/null +++ b/ctransformers/models/llms/starcoder.cc @@ -0,0 +1,761 @@ +#include "llm.h" + +// https://github.com/ggerganov/ggml/blob/master/examples/starcoder/main.cpp + +// default hparams (GPT-2 117M) +// https://huggingface.co/bigcode/gpt_bigcode-santacoder/blob/main/config.json +struct starcoder_hparams { + int32_t n_vocab = 49280; + int32_t n_ctx = 2048; + int32_t n_embd = 2048; + int32_t n_head = 16; + int32_t n_layer = 24; + int32_t ftype = 1; +}; + +struct starcoder_layer { + // normalization + struct ggml_tensor *ln_1_g; + struct ggml_tensor *ln_1_b; + + struct ggml_tensor *ln_2_g; + struct ggml_tensor *ln_2_b; + + // attention + struct ggml_tensor *c_attn_attn_w; + struct ggml_tensor *c_attn_attn_b; + + struct ggml_tensor *c_attn_proj_w; + struct ggml_tensor *c_attn_proj_b; + + // mlp + struct ggml_tensor *c_mlp_fc_w; + struct ggml_tensor *c_mlp_fc_b; + + struct ggml_tensor *c_mlp_proj_w; + struct ggml_tensor *c_mlp_proj_b; +}; + +struct starcoder_model { + starcoder_hparams hparams; + + // normalization + struct ggml_tensor *ln_f_g; + struct ggml_tensor *ln_f_b; + + struct ggml_tensor *wte; // position embedding + struct ggml_tensor *wpe; // token embedding + struct ggml_tensor *lm_head; // language model head + + std::vector layers; + + // key + value memory + struct ggml_tensor *memory_k; + struct ggml_tensor *memory_v; + + // + struct ggml_context *ctx; + std::map tensors; +}; + +// load the model's weights from a file +bool starcoder_model_load(const std::string &fname, starcoder_model &model, + gpt_vocab &vocab) { + auto fin = std::ifstream(fname, std::ios::binary); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); + return false; + } + + // verify magic + { + uint32_t magic; + fin.read((char *)&magic, sizeof(magic)); + if (magic != 0x67676d6c) { + fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, + fname.c_str()); + return false; + } + } + + // load hparams + { + auto &hparams = model.hparams; + + fin.read((char *)&hparams.n_vocab, sizeof(hparams.n_vocab)); + fin.read((char *)&hparams.n_ctx, sizeof(hparams.n_ctx)); + fin.read((char *)&hparams.n_embd, sizeof(hparams.n_embd)); + fin.read((char *)&hparams.n_head, sizeof(hparams.n_head)); + fin.read((char *)&hparams.n_layer, sizeof(hparams.n_layer)); + fin.read((char *)&hparams.ftype, sizeof(hparams.ftype)); + + const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; + + hparams.ftype %= GGML_QNT_VERSION_FACTOR; + } + + // load vocab + { + int32_t n_vocab = 0; + fin.read((char *)&n_vocab, sizeof(n_vocab)); + + if (n_vocab != model.hparams.n_vocab) { + fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", + __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); + return false; + } + + std::string word; + std::vector buf(128); + + for (int i = 0; i < n_vocab; i++) { + uint32_t len; + fin.read((char *)&len, sizeof(len)); + + buf.resize(len); + fin.read((char *)buf.data(), len); + word.assign(buf.data(), len); + + vocab.token_to_id[word] = i; + vocab.id_to_token[i] = word; + } + + // Add StarChat special tokens. + for (const std::string &token : { + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|end|>", + }) { + if (vocab.token_to_id.find(token) != vocab.token_to_id.end()) { + vocab.add_special_token(token); + } + } + } + + // for the big tensors, we have the option to store the data in 16-bit floats + // or quantized in order to save memory and also to speed up the computation + ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype)(model.hparams.ftype)); + if (wtype == GGML_TYPE_COUNT) { + fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", + __func__, fname.c_str(), model.hparams.ftype); + return false; + } + + auto &ctx = model.ctx; + + size_t ctx_size = 0; + + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_vocab = hparams.n_vocab; + + const int head_dim = n_embd / hparams.n_head; + const int kv_heads = hparams.n_head; // 1 if MQA else hparams.n_head + const int kv_dim = kv_heads * head_dim; + + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_g + ctx_size += n_embd * ggml_type_sizef(GGML_TYPE_F32); // ln_f_b + + ctx_size += n_vocab * n_embd * ggml_type_sizef(wtype); // wte + ctx_size += n_ctx * n_embd * ggml_type_sizef(GGML_TYPE_F32); // wpe + ctx_size += n_vocab * n_embd * ggml_type_sizef(wtype); // lm_head + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_g + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_1_b + + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_g + ctx_size += n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // ln_2_b + + ctx_size += n_layer * ((n_embd + 2 * kv_dim) * n_embd * + ggml_type_sizef(wtype)); // c_attn_attn_w // TODO: + ctx_size += n_layer * ((n_embd + 2 * kv_dim) * + ggml_type_sizef(GGML_TYPE_F32)); // c_attn_attn_b + + ctx_size += + n_layer * (n_embd * n_embd * ggml_type_sizef(wtype)); // c_attn_proj_w + ctx_size += + n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_attn_proj_b + + ctx_size += + n_layer * (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_fc_w + ctx_size += + n_layer * (4 * n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_fc_b + + ctx_size += n_layer * + (4 * n_embd * n_embd * ggml_type_sizef(wtype)); // c_mlp_proj_w + ctx_size += + n_layer * (n_embd * ggml_type_sizef(GGML_TYPE_F32)); // c_mlp_proj_b + + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F32); // memory_k + ctx_size += + n_ctx * n_layer * n_embd * ggml_type_sizef(GGML_TYPE_F32); // memory_v + + ctx_size += (6 + 12 * n_layer) * 512; // object overhead + } + + // create the ggml context + { + struct ggml_init_params params = { + /*.mem_size =*/ctx_size, + /*.mem_buffer =*/NULL, + /*.no_alloc =*/false, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return false; + } + } + + // prepare memory for the weights + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_vocab = hparams.n_vocab; + + const int head_dim = n_embd / hparams.n_head; + const int kv_heads = hparams.n_head; // 1 if MQA else hparams.n_head + const int kv_dim = kv_heads * head_dim; + + model.layers.resize(n_layer); + + model.ln_f_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + model.ln_f_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + model.wte = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + model.wpe = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ctx); + model.lm_head = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + + // map by name + model.tensors["model/ln_f/g"] = model.ln_f_g; + model.tensors["model/ln_f/b"] = model.ln_f_b; + + model.tensors["model/wte"] = model.wte; + model.tensors["model/wpe"] = model.wpe; + model.tensors["model/lm_head"] = model.lm_head; + + for (int i = 0; i < n_layer; ++i) { + auto &layer = model.layers[i]; + + layer.ln_1_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ln_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.ln_2_g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + layer.ln_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.c_attn_attn_w = + ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd + 2 * kv_dim); + layer.c_attn_attn_b = + ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd + 2 * kv_dim); + + layer.c_attn_proj_w = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.c_attn_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.c_mlp_fc_w = ggml_new_tensor_2d( + ctx, wtype, n_embd, 4 * n_embd); // TODO: 4*n_embd = config.n_inner + layer.c_mlp_fc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4 * n_embd); + + layer.c_mlp_proj_w = ggml_new_tensor_2d(ctx, wtype, 4 * n_embd, n_embd); + layer.c_mlp_proj_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + // map by name + model.tensors["model/h" + std::to_string(i) + "/ln_1/g"] = layer.ln_1_g; + model.tensors["model/h" + std::to_string(i) + "/ln_1/b"] = layer.ln_1_b; + + model.tensors["model/h" + std::to_string(i) + "/ln_2/g"] = layer.ln_2_g; + model.tensors["model/h" + std::to_string(i) + "/ln_2/b"] = layer.ln_2_b; + + model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/w"] = + layer.c_attn_attn_w; + model.tensors["model/h" + std::to_string(i) + "/attn/c_attn/b"] = + layer.c_attn_attn_b; + + model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/w"] = + layer.c_attn_proj_w; + model.tensors["model/h" + std::to_string(i) + "/attn/c_proj/b"] = + layer.c_attn_proj_b; + + model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/w"] = + layer.c_mlp_fc_w; + model.tensors["model/h" + std::to_string(i) + "/mlp/c_fc/b"] = + layer.c_mlp_fc_b; + + model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/w"] = + layer.c_mlp_proj_w; + model.tensors["model/h" + std::to_string(i) + "/mlp/c_proj/b"] = + layer.c_mlp_proj_b; + } + } + + // key + value memory + { + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + + const int n_mem = n_layer * n_ctx; + const int n_elements = n_embd * n_mem; + + model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); + model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); + + const size_t memory_size = + ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); + } + + // load weights + { + size_t total_size = 0; + + bool has_lm_head = false; + + while (true) { + int32_t n_dims; + int32_t length; + int32_t ttype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ttype), sizeof(ttype)); + + if (fin.eof()) { + break; + } + + int32_t nelements = 1; + int32_t ne[2] = {1, 1}; + for (int i = 0; i < n_dims; ++i) { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + + std::string name(length, 0); + fin.read(&name[0], length); + + if (model.tensors.find(name.data()) == model.tensors.end()) { + fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, + name.data()); + return false; + } + + auto tensor = model.tensors[name.data()]; + if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { + fprintf(stderr, + "%s: tensor '%s' has wrong shape in model file: got [%d, %d], " + "expected [%d, %d]\n", + __func__, name.data(), (int)tensor->ne[0], (int)tensor->ne[1], + ne[0], ne[1]); + return false; + } + if (ggml_nelements(tensor) != nelements) { + fprintf(stderr, + "%s: tensor '%s' has wrong size in model file. got %d, " + "expected %d\n", + __func__, name.data(), (int)ggml_nelements(tensor), nelements); + return false; + } + + // for debugging + if (0) { + printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", + name.data(), ne[0], ne[1], ggml_type_name(ggml_type(ttype)), + ggml_nbytes(tensor) / 1024.0 / 1024.0, ggml_nbytes(tensor)); + } + + const size_t bpe = ggml_type_size(ggml_type(ttype)); + + if ((nelements * bpe) / ggml_blck_size(tensor->type) != + ggml_nbytes(tensor)) { + fprintf(stderr, + "%s: tensor '%s' has wrong size in model file: got %zu, " + "expected %zu\n", + __func__, name.data(), ggml_nbytes(tensor), nelements * bpe); + return false; + } + + fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + + // GPT-2 models share the WTE tensor as the LM head + if (name == "model/wte" && has_lm_head == false) { + memcpy(model.lm_head->data, tensor->data, ggml_nbytes(tensor)); + } + + if (name == "model/lm_head") { + has_lm_head = true; + } + + total_size += ggml_nbytes(tensor); + } + } + + fin.close(); + + return true; +} + +// evaluate the transformer +// +// - model: the model +// - n_threads: number of threads to use +// - n_past: the context size so far +// - embd_inp: the embeddings of the tokens in the context +// - embd_w: the predicted logits for the next token +// +bool starcoder_eval(const starcoder_model &model, const int n_threads, + const int n_past, + const std::vector &embd_inp, + std::vector &embd_w, size_t &mem_per_token) { + const int N = embd_inp.size(); + + const auto &hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_head = hparams.n_head; + const int n_vocab = hparams.n_vocab; + + static size_t buf_size = 256u * 1024 * 1024; + static void *buf = malloc(buf_size); + + // use 2 scratch buffers + // TODO: very hacky solution - reimplement in a more elegant way + static size_t scr0_size = 256u * 1024 * 1024; + static void *scr0 = malloc(scr0_size); + + static size_t scr1_size = 256u * 1024 * 1024; + static void *scr1 = malloc(scr1_size); + + if (mem_per_token > 0 && mem_per_token * N > buf_size) { + const size_t buf_size_new = + 1.1 * + (mem_per_token * N); // add 10% to account for ggml object overhead + // printf("\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, + // buf_size, buf_size_new); + + // reallocate + buf_size = buf_size_new; + buf = realloc(buf, buf_size); + if (buf == nullptr) { + fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); + return false; + } + } + + struct ggml_init_params params = { + /*.mem_size =*/buf_size, + /*.mem_buffer =*/buf, + /*.no_alloc =*/false, + }; + + struct ggml_context *ctx0 = ggml_init(params); + struct ggml_cgraph gf = {}; + gf.n_threads = n_threads; + + struct ggml_tensor *embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd)); + + struct ggml_tensor *position = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + for (int i = 0; i < N; ++i) { + ((int32_t *)position->data)[i] = n_past + i; + } + + // wte + wpe + struct ggml_tensor *inpL = + ggml_add(ctx0, ggml_get_rows(ctx0, model.wte, embd), + ggml_get_rows(ctx0, model.wpe, position)); + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor *cur; + + ggml_set_scratch(ctx0, { + 0, + scr0_size, + scr0, + }); + + // norm + { + // [ 768, N] + cur = ggml_norm(ctx0, inpL); + + // cur = ln_1_g*cur + ln_1_b + // [ 768, N] + cur = ggml_add( + ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_1_g, cur), cur), + ggml_repeat(ctx0, model.layers[il].ln_1_b, cur)); + } + + // attn + // [2304, 768] - model.layers[il].c_attn_attn_w + // [2304, 1] - model.layers[il].c_attn_attn_b + // [ 768, N] - cur (in) + // [2304, N] - cur (out) + // + // cur = attn_w*cur + attn_b + // [2304, N] + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_attn_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_attn_b, cur), cur); + } + + // self-attention + { + struct ggml_tensor *Qcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], + 0 * sizeof(float) * n_embd); + struct ggml_tensor *Kcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], + 1 * sizeof(float) * n_embd); + struct ggml_tensor *Vcur = ggml_view_2d(ctx0, cur, n_embd, N, cur->nb[1], + 2 * sizeof(float) * n_embd); + + // store key and value to memory + if (N >= 1) { + struct ggml_tensor *k = + ggml_view_1d(ctx0, model.memory_k, N * n_embd, + (ggml_element_size(model.memory_k) * n_embd) * + (il * n_ctx + n_past)); + struct ggml_tensor *v = + ggml_view_1d(ctx0, model.memory_v, N * n_embd, + (ggml_element_size(model.memory_v) * n_embd) * + (il * n_ctx + n_past)); + + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, + // 3) [64, N, 12] + struct ggml_tensor *Q = + ggml_permute(ctx0, + ggml_cpy(ctx0, Qcur, + ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, + n_embd / n_head, n_head, N)), + 0, 2, 1, 3); + + // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) + // [64, n_past + N, 12] + struct ggml_tensor *K = ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d( + ctx0, model.memory_k, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_k) * n_embd), + n_embd / n_head, n_head, n_past + N), + 0, 2, 1, 3); // TODO: need to be tiled + + // GG: flash attention + // struct ggml_tensor * V = + // ggml_cpy(ctx0, + // ggml_permute(ctx0, + // ggml_reshape_3d(ctx0, + // ggml_view_1d(ctx0, model.memory_v, (n_past + + // N)*n_embd, + // il*n_ctx*ggml_element_size(model.memory_v)*n_embd), + // n_embd/n_head, n_head, n_past + N), + // 1, 2, 0, 3), + // ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_past + N, + // n_embd/n_head, n_head)); + + // struct ggml_tensor * KQV = ggml_flash_attn(ctx0, Q, K, V, true); + + // K * Q + // [n_past + N, N, 12] + struct ggml_tensor *KQ = + ggml_mul_mat(ctx0, K, Q); // TODO: check if it broadcasts + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + // [n_past + N, N, 12] + struct ggml_tensor *KQ_scaled = ggml_scale_inplace( + ctx0, KQ, ggml_new_f32(ctx0, 1.0f / sqrt(float(n_embd) / n_head))); + + // KQ_masked = mask_past(KQ_scaled) + // [n_past + N, N, 12] + struct ggml_tensor *KQ_masked = + ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past); + + // KQ = soft_max(KQ_masked) + // [n_past + N, N, 12] + struct ggml_tensor *KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); + + // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, + // 3).contiguous() [n_past + N, 64, 12] + struct ggml_tensor *V_trans = ggml_cpy( + ctx0, + ggml_permute( + ctx0, + ggml_reshape_3d( + ctx0, + ggml_view_1d( + ctx0, model.memory_v, (n_past + N) * n_embd, + il * n_ctx * ggml_element_size(model.memory_v) * n_embd), + n_embd / n_head, n_head, n_past + N), + 1, 2, 0, 3), + ggml_new_tensor_3d(ctx0, model.memory_v->type, n_past + N, + n_embd / n_head, n_head)); + + // KQV = transpose(V) * KQ_soft_max + // [64, N, 12] + struct ggml_tensor *KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + // [64, 12, N] + struct ggml_tensor *KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + + // cur = KQV_merged.contiguous().view(n_embd, N) + // [768, N] + cur = ggml_cpy(ctx0, KQV_merged, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + } + + // projection + // [ 768, 768] - model.layers[il].c_attn_proj_w + // [ 768, 1] - model.layers[il].c_attn_proj_b + // [ 768, N] - cur (in) + // [ 768, N] - cur (out) + // + // cur = proj_w*cur + proj_b + // [768, N] + { + cur = ggml_mul_mat(ctx0, model.layers[il].c_attn_proj_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_attn_proj_b, cur), cur); + } + + // add the input + cur = ggml_add(ctx0, cur, inpL); + + struct ggml_tensor *inpFF = cur; + + ggml_set_scratch(ctx0, { + 0, + scr1_size, + scr1, + }); + + // feed-forward network + { + // norm + { + cur = ggml_norm(ctx0, inpFF); + + // cur = ln_2_g*cur + ln_2_b + // [ 768, N] + cur = ggml_add( + ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.layers[il].ln_2_g, cur), + cur), + ggml_repeat(ctx0, model.layers[il].ln_2_b, cur)); + } + + // fully connected + // [3072, 768] - model.layers[il].c_mlp_fc_w + // [3072, 1] - model.layers[il].c_mlp_fc_b + // [ 768, N] - cur (in) + // [3072, N] - cur (out) + // + // cur = fc_w*cur + fc_b + // [3072, N] + cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_fc_w, cur); + + cur = ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_fc_b, cur), + cur); + + // GELU activation + // [3072, N] + cur = ggml_gelu(ctx0, cur); + + // projection + // [ 768, 3072] - model.layers[il].c_mlp_proj_w + // [ 768, 1] - model.layers[il].c_mlp_proj_b + // [3072, N] - cur (in) + // [ 768, N] - cur (out) + // + // cur = proj_w*cur + proj_b + // [768, N] + cur = ggml_mul_mat(ctx0, model.layers[il].c_mlp_proj_w, cur); + + cur = ggml_add( + ctx0, ggml_repeat(ctx0, model.layers[il].c_mlp_proj_b, cur), cur); + } + + // input for next layer + inpL = ggml_add(ctx0, cur, inpFF); + } + + ggml_set_scratch(ctx0, { + 0, + scr0_size, + scr0, + }); + + // norm + { + // [ 768, N] + inpL = ggml_norm(ctx0, inpL); + + // inpL = ln_f_g*inpL + ln_f_b + // [ 768, N] + inpL = ggml_add(ctx0, + ggml_mul(ctx0, ggml_repeat(ctx0, model.ln_f_g, inpL), inpL), + ggml_repeat(ctx0, model.ln_f_b, inpL)); + } + + ggml_set_scratch(ctx0, { + 0, + 0, + nullptr, + }); + + // inpL = WTE * inpL + // [ 768, 50257] - model.lm_head + // [ 768, N] - inpL + inpL = ggml_mul_mat(ctx0, model.lm_head, inpL); + + // logits -> probs + // inpL = ggml_soft_max_inplace(ctx0, inpL); + + // run the computation + ggml_build_forward_expand(&gf, inpL); + ggml_graph_compute(ctx0, &gf); + + // if (n_past%100 == 0) { + // ggml_graph_print (&gf); + // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); + //} + + // embd_w.resize(n_vocab*N); + // memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); + + // return result just for the last token + embd_w.resize(n_vocab); + memcpy(embd_w.data(), (float *)ggml_get_data(inpL) + (n_vocab * (N - 1)), + sizeof(float) * n_vocab); + + if (mem_per_token == 0) { + mem_per_token = ggml_used_mem(ctx0) / N; + } + // printf("used_mem = %zu MB\n", ggml_used_mem(ctx0)/(1024*1024)); + + ggml_free(ctx0); + + return true; +} + +REGISTER_LLM(starcoder); diff --git a/ctransformers/models/submodules/ggllm.cpp/.clang-tidy b/ctransformers/models/submodules/ggllm.cpp/.clang-tidy new file mode 100644 index 0000000000000000000000000000000000000000..1a42b9abc79edbdd0fcd8069faf78cb31d760544 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.clang-tidy @@ -0,0 +1,18 @@ +--- +Checks: > + bugprone-*, + -bugprone-easily-swappable-parameters, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-narrowing-conversions, + readability-*, + -readability-avoid-unconditional-preprocessor-if, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-implicit-bool-conversion, + -readability-magic-numbers, + -readability-uppercase-literal-suffix, + clang-analyzer-*, + -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, + performance-*, + portability-*, +FormatStyle: none diff --git a/ctransformers/models/submodules/ggllm.cpp/.devops/full.Dockerfile b/ctransformers/models/submodules/ggllm.cpp/.devops/full.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..687628b35e996f8ebe772fe82e24f901d5e04d17 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.devops/full.Dockerfile @@ -0,0 +1,21 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION as build + +RUN apt-get update && \ + apt-get install -y build-essential python3 python3-pip git + +COPY requirements.txt requirements.txt + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +RUN make + +ENV LC_ALL=C.utf8 + +ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/ctransformers/models/submodules/ggllm.cpp/.devops/main.Dockerfile b/ctransformers/models/submodules/ggllm.cpp/.devops/main.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..3ab1decd6c2b5515eed5f7254026cc7f08acb081 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.devops/main.Dockerfile @@ -0,0 +1,20 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION as build + +RUN apt-get update && \ + apt-get install -y build-essential git + +WORKDIR /app + +COPY . . + +RUN make + +FROM ubuntu:$UBUNTU_VERSION as runtime + +COPY --from=build /app/main /main + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/main" ] diff --git a/ctransformers/models/submodules/ggllm.cpp/.devops/tools.sh b/ctransformers/models/submodules/ggllm.cpp/.devops/tools.sh new file mode 100644 index 0000000000000000000000000000000000000000..860a7e8913f1c2e68965a302ca7b73e2cfd1bbc9 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.devops/tools.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -e + +# Read the first argument into a variable +arg1="$1" + +# Shift the arguments to remove the first one +shift + +# Join the remaining arguments into a single string +arg2="$@" + +if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then + python3 ./convert.py $arg2 +elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then + ./quantize $arg2 +elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then + ./main $arg2 +elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then + echo "Converting PTH to GGML..." + for i in `ls $1/$2/ggml-model-f16.bin*`; do + if [ -f "${i/f16/q4_0}" ]; then + echo "Skip model quantization, it already exists: ${i/f16/q4_0}" + else + echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..." + ./quantize "$i" "${i/f16/q4_0}" q4_0 + fi + done +else + echo "Unknown command: $arg1" + echo "Available commands: " + echo " --run (-r): Run a model previously converted into ggml" + echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512" + echo " --convert (-c): Convert a llama model into ggml" + echo " ex: --outtype f16 \"/models/7B/\" " + echo " --quantize (-q): Optimize with quantization process ggml" + echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2" + echo " --all-in-one (-a): Execute --convert & --quantize" + echo " ex: \"/models/\" 7B" +fi diff --git a/ctransformers/models/submodules/ggllm.cpp/.dockerignore b/ctransformers/models/submodules/ggllm.cpp/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..462fac23a69321cd7ba478493dbaeac226155b19 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.dockerignore @@ -0,0 +1,24 @@ +*.o +*.a +.cache/ +.vs/ +.vscode/ +.DS_Store + +build/ +build-em/ +build-debug/ +build-release/ +build-static/ +build-no-accel/ +build-sanitize-addr/ +build-sanitize-thread/ + +models/* + +/main +/quantize + +arm_neon.h +compile_commands.json +Dockerfile diff --git a/ctransformers/models/submodules/ggllm.cpp/.ecrc b/ctransformers/models/submodules/ggllm.cpp/.ecrc new file mode 100644 index 0000000000000000000000000000000000000000..b682057dd6891683a089bc17ce86ecfe6d94be0d --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.ecrc @@ -0,0 +1,5 @@ +{ + "Disable": { + "IndentSize": true + } +} diff --git a/ctransformers/models/submodules/ggllm.cpp/.editorconfig b/ctransformers/models/submodules/ggllm.cpp/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..135a7e4bce5a168e13818ac47da04a693ce8ff8d --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.editorconfig @@ -0,0 +1,19 @@ +# https://EditorConfig.org + +# Top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file, utf-8 charset +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 +indent_style = space +indent_size = 4 + +[Makefile] +indent_style = tab + +[prompts/*.txt] +insert_final_newline = unset diff --git a/ctransformers/models/submodules/ggllm.cpp/.flake8 b/ctransformers/models/submodules/ggllm.cpp/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..113ca5fd3cb17dec32f0a3b42874a4b02120d5b8 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.flake8 @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 125 diff --git a/ctransformers/models/submodules/ggllm.cpp/.github/ISSUE_TEMPLATE/custom.md b/ctransformers/models/submodules/ggllm.cpp/.github/ISSUE_TEMPLATE/custom.md new file mode 100644 index 0000000000000000000000000000000000000000..8fd95535677803e6349e9ce769cb51f5814e1458 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.github/ISSUE_TEMPLATE/custom.md @@ -0,0 +1,185 @@ +--- +name: Issue and enhancement template +about: Used to report issues and request enhancements for llama.cpp +title: "[User] Insert summary of your issue or enhancement.." +labels: '' +assignees: '' + +--- + +# Prerequisites + +Please answer the following questions for yourself before submitting an issue. + +- [ ] I am running the latest code. Development is very rapid so there are no tagged versions as of now. +- [ ] I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md). +- [ ] I [searched using keywords relevant to my issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) to make sure that I am creating a new issue that is not already open (or closed). +- [ ] I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new bug or useful enhancement to share. + +# Expected Behavior + +Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do. + +# Current Behavior + +Please provide a detailed written description of what `llama.cpp` did, instead. + +# Environment and Context + +Please provide detailed information about your computer setup. This is important in case the issue is not reproducible except for under certain specific conditions. + +* Physical (or virtual) hardware you are using, e.g. for Linux: + +`$ lscpu` + +* Operating System, e.g. for Linux: + +`$ uname -a` + +* SDK version, e.g. for Linux: + +``` +$ python3 --version +$ make --version +$ g++ --version +``` + +# Failure Information (for bugs) + +Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template. + +# Steps to Reproduce + +Please provide detailed steps for reproducing the issue. We are not sitting in front of your screen, so the more detail the better. + +1. step 1 +2. step 2 +3. step 3 +4. etc. + +# Failure Logs + +Please include any relevant log snippets or files. If it works under one configuration but not under another, please provide logs for both configurations and their corresponding outputs so it is easy to see where behavior changes. + +Also, please try to **avoid using screenshots** if at all possible. Instead, copy/paste the console output and use [Github's markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) to cleanly format your logs for easy readability. + +Example environment info: +``` +llama.cpp$ git log | head -1 +commit 2af23d30434a677c6416812eea52ccc0af65119c + +llama.cpp$ lscpu | egrep "AMD|Flags" +Vendor ID: AuthenticAMD +Model name: AMD Ryzen Threadripper 1950X 16-Core Processor +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid amd_dcm aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb hw_pstate ssbd ibpb vmmcall fsgsbase bmi1 avx2 smep bmi2 rdseed adx smap clflushopt sha_ni xsaveopt xsavec xgetbv1 xsaves clzero irperf xsaveerptr arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif overflow_recov succor smca sme sev +Virtualization: AMD-V + +llama.cpp$ python3 --version +Python 3.10.9 + +llama.cpp$ pip list | egrep "torch|numpy|sentencepiece" +numpy 1.24.2 +numpydoc 1.5.0 +sentencepiece 0.1.97 +torch 1.13.1 +torchvision 0.14.1 + +llama.cpp$ make --version | head -1 +GNU Make 4.3 + +$ md5sum ./models/65B/ggml-model-q4_0.bin +dbdd682cce80e2d6e93cefc7449df487 ./models/65B/ggml-model-q4_0.bin +``` + +Example run with the Linux command [perf](https://www.brendangregg.com/perf.html) +``` +llama.cpp$ perf stat ./main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p "Please close your issue when it has been answered." +main: seed = 1679149377 +llama_model_load: loading model from './models/65B/ggml-model-q4_0.bin' - please wait ... +llama_model_load: n_vocab = 32000 +llama_model_load: n_ctx = 512 +llama_model_load: n_embd = 8192 +llama_model_load: n_mult = 256 +llama_model_load: n_head = 64 +llama_model_load: n_layer = 80 +llama_model_load: n_rot = 128 +llama_model_load: f16 = 2 +llama_model_load: n_ff = 22016 +llama_model_load: n_parts = 8 +llama_model_load: ggml ctx size = 41477.73 MB +llama_model_load: memory_size = 2560.00 MB, n_mem = 40960 +llama_model_load: loading model part 1/8 from './models/65B/ggml-model-q4_0.bin' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 2/8 from './models/65B/ggml-model-q4_0.bin.1' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 3/8 from './models/65B/ggml-model-q4_0.bin.2' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 4/8 from './models/65B/ggml-model-q4_0.bin.3' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 5/8 from './models/65B/ggml-model-q4_0.bin.4' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 6/8 from './models/65B/ggml-model-q4_0.bin.5' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 7/8 from './models/65B/ggml-model-q4_0.bin.6' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 8/8 from './models/65B/ggml-model-q4_0.bin.7' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 + +system_info: n_threads = 16 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | + +main: prompt: 'Please close your issue when it has been answered.' +main: number of tokens in prompt = 11 + 1 -> '' + 12148 -> 'Please' + 3802 -> ' close' + 596 -> ' your' + 2228 -> ' issue' + 746 -> ' when' + 372 -> ' it' + 756 -> ' has' + 1063 -> ' been' + 7699 -> ' answered' + 29889 -> '.' + +sampling parameters: temp = 0.800000, top_k = 40, top_p = 0.950000, repeat_last_n = 64, repeat_penalty = 1.300000 + + +Please close your issue when it has been answered. +@duncan-donut: I'm trying to figure out what kind of "support" you need for this script and why, exactly? Is there a question about how the code works that hasn't already been addressed in one or more comments below this ticket, or are we talking something else entirely like some sorta bugfixing job because your server setup is different from mine?? +I can understand if your site needs to be running smoothly and you need help with a fix of sorts but there should really be nothing wrong here that the code itself could not handle. And given that I'm getting reports about how it works perfectly well on some other servers, what exactly are we talking? A detailed report will do wonders in helping us get this resolved for ya quickly so please take your time and describe the issue(s) you see as clearly & concisely as possible!! +@duncan-donut: I'm not sure if you have access to cPanel but you could try these instructions. It is worth a shot! Let me know how it goes (or what error message, exactly!) when/if ya give that code a go? [end of text] + + +main: mem per token = 71159620 bytes +main: load time = 19309.95 ms +main: sample time = 168.62 ms +main: predict time = 223895.61 ms / 888.47 ms per token +main: total time = 246406.42 ms + + Performance counter stats for './main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p Please close your issue when it has been answered.': + + 3636882.89 msec task-clock # 14.677 CPUs utilized + 13509 context-switches # 3.714 /sec + 2436 cpu-migrations # 0.670 /sec + 10476679 page-faults # 2.881 K/sec + 13133115082869 cycles # 3.611 GHz (16.77%) + 29314462753 stalled-cycles-frontend # 0.22% frontend cycles idle (16.76%) + 10294402631459 stalled-cycles-backend # 78.39% backend cycles idle (16.74%) + 23479217109614 instructions # 1.79 insn per cycle + # 0.44 stalled cycles per insn (16.76%) + 2353072268027 branches # 647.002 M/sec (16.77%) + 1998682780 branch-misses # 0.08% of all branches (16.76%) + + 247.802177522 seconds time elapsed + + 3618.573072000 seconds user + 18.491698000 seconds sys +``` diff --git a/ctransformers/models/submodules/ggllm.cpp/.github/workflows/build.yml b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/build.yml new file mode 100644 index 0000000000000000000000000000000000000000..b87ea76bc41eefb16ba146bbd35dec54fba7c0f3 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/build.yml @@ -0,0 +1,589 @@ +name: CI + +on: + workflow_dispatch: # allows manual triggering + inputs: + create_release: + description: 'Create new release' + required: true + type: boolean + push: + branches: + - master + paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] + pull_request: + types: [opened, synchronize, reopened] + paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + +jobs: + ubuntu-focal-make: + runs-on: ubuntu-20.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential gcc-8 + + - name: Build + id: make_build + run: | + CC=gcc-8 make + + ubuntu-latest-cmake: + runs-on: ubuntu-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. + cmake --build . --config Release + + - name: Test + id: cmake_test + run: | + cd build + ctest --verbose + + ubuntu-latest-cmake-sanitizer: + runs-on: ubuntu-latest + + continue-on-error: true + + strategy: + matrix: + sanitizer: [ADDRESS, THREAD, UNDEFINED] + build_type: [Debug, Release] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + cmake --build . --config ${{ matrix.build_type }} + + - name: Test + id: cmake_test + run: | + cd build + ctest --verbose + + macOS-latest-make: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Dependencies + id: depends + run: | + brew update + + - name: Build + id: make_build + run: | + make + + macOS-latest-cmake: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Dependencies + id: depends + run: | + brew update + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake -DLLAMA_AVX2=OFF .. + cmake --build . --config Release + + - name: Test + id: cmake_test + run: | + cd build + ctest --verbose + + windows-latest-cmake: + runs-on: windows-latest + env: + OPENBLAS_VERSION: 0.3.23 + OPENCL_VERSION: 2023.04.17 + CLBLAST_VERSION: 1.6.0 + + strategy: + matrix: + include: + - build: 'avx2' + defines: '-DLLAMA_BUILD_SERVER=ON' + - build: 'avx' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF' + - build: 'avx512' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON' + - build: 'clblast' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"' + - build: 'openblas' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Download OpenCL SDK + id: get_opencl + if: ${{ matrix.build == 'clblast' }} + run: | + curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip" + mkdir $env:RUNNER_TEMP/opencl + tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl + + - name: Download CLBlast + id: get_clblast + if: ${{ matrix.build == 'clblast' }} + run: | + curl.exe -o $env:RUNNER_TEMP/clblast.7z -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-windows-x64.7z" + curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE" + 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/clblast.7z + rename-item $env:RUNNER_TEMP/CLBlast-${env:CLBLAST_VERSION}-windows-x64 clblast + foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) { + $txt = Get-Content -Path $f -Raw + $txt.Replace('C:/vcpkg/packages/opencl_x64-windows/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8 + } + + - name: Download OpenBLAS + id: get_openblas + if: ${{ matrix.build == 'openblas' }} + run: | + curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip" + curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE" + mkdir $env:RUNNER_TEMP/openblas + tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas + $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) + $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) + $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe') + & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. ${{ matrix.defines }} + cmake --build . --config Release + + - name: Add clblast.dll + id: add_clblast_dll + if: ${{ matrix.build == 'clblast' }} + run: | + cp $env:RUNNER_TEMP/clblast/lib/clblast.dll ./build/bin/Release + cp $env:RUNNER_TEMP/CLBlast.LICENSE.txt ./build/bin/Release/CLBlast-${env:CLBLAST_VERSION}.txt + + - name: Add libopenblas.dll + id: add_libopenblas_dll + if: ${{ matrix.build == 'openblas' }} + run: | + cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll + cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt + + - name: Check AVX512F support + id: check_avx512f + if: ${{ matrix.build == 'avx512' }} + continue-on-error: true + run: | + cd build + $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) + $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) + $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe') + echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c + & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main + .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO" + + - name: Test + id: cmake_test + if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible + run: | + cd build + ctest -C Release --verbose + + - name: Get commit hash + id: commit + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: pr-mpt/actions-commit-hash@v2 + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt + 7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip .\build\bin\Release\* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v3 + with: + path: | + llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip + + windows-latest-cmake-cublas: + runs-on: windows-latest + + strategy: + matrix: + cuda: ['12.1.0', '11.7.1'] + build: ['cublas'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - uses: Jimver/cuda-toolkit@v0.2.10 + id: cuda-toolkit + with: + cuda: ${{ matrix.cuda }} + # TODO(green-sky): _dev seems to fail, and non dev are not enought + #sub-packages: '["nvcc", "cudart", "cublas", "cudart_dev", "cublas_dev"]' + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON + cmake --build . --config Release + + - name: Get commit hash + id: commit + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: pr-mpt/actions-commit-hash@v2 + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + 7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v3 + with: + path: | + llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip + + - name: Copy and pack Cuda runtime + if: ${{ matrix.cuda == '12.1.0' }} + # TODO(green-sky): paths are cuda 12 specific + run: | + echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" + mkdir '.\build\bin\cudart\' + cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_12.dll" '.\build\bin\cudart\' + cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_12.dll" '.\build\bin\cudart\' + cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_12.dll" '.\build\bin\cudart\' + 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\* + + - name: Copy and pack Cuda runtime + if: ${{ matrix.cuda == '11.7.1' }} + # TODO(green-sky): paths are cuda 11 specific + run: | + echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" + mkdir '.\build\bin\cudart\' + ls "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin" + cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_110.dll" '.\build\bin\cudart\' + cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_11.dll" '.\build\bin\cudart\' + cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_11.dll" '.\build\bin\cudart\' + 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\* + + - name: Upload Cuda runtime + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v3 + with: + path: | + cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip + + release: + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + + runs-on: ubuntu-latest + + needs: + - ubuntu-focal-make + - ubuntu-latest-cmake + - macOS-latest-make + - macOS-latest-cmake + - windows-latest-cmake + - windows-latest-cmake-cublas + + steps: + - name: Download artifacts + id: download-artifact + uses: actions/download-artifact@v3 + + - name: Get commit hash + id: commit + uses: pr-mpt/actions-commit-hash@v2 + + - name: Create release + id: create_release + uses: anzz1/action-create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }} + + - name: Upload release + id: upload_release + uses: actions/github-script@v3 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + const path = require('path'); + const fs = require('fs'); + const release_id = '${{ steps.create_release.outputs.id }}'; + for (let file of await fs.readdirSync('./artifact')) { + if (path.extname(file) === '.zip') { + console.log('uploadReleaseAsset', file); + await github.repos.uploadReleaseAsset({ + owner: context.repo.owner, + repo: context.repo.repo, + release_id: release_id, + name: file, + data: await fs.readFileSync(`./artifact/${file}`) + }); + } + } + +# ubuntu-latest-gcc: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# build: [Debug, Release] +# +# steps: +# - name: Clone +# uses: actions/checkout@v1 +# +# - name: Dependencies +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# sudo apt-get install cmake +# +# - name: Configure +# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# +# - name: Build +# run: | +# make +# +# ubuntu-latest-clang: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# build: [Debug, Release] +# +# steps: +# - name: Clone +# uses: actions/checkout@v1 +# +# - name: Dependencies +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# sudo apt-get install cmake +# +# - name: Configure +# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang +# +# - name: Build +# run: | +# make +# +# ubuntu-latest-gcc-sanitized: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# sanitizer: [ADDRESS, THREAD, UNDEFINED] +# +# steps: +# - name: Clone +# uses: actions/checkout@v1 +# +# - name: Dependencies +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# sudo apt-get install cmake +# +# - name: Configure +# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON +# +# - name: Build +# run: | +# make +# +# windows: +# runs-on: windows-latest +# +# strategy: +# matrix: +# build: [Release] +# arch: [Win32, x64] +# include: +# - arch: Win32 +# s2arc: x86 +# - arch: x64 +# s2arc: x64 +# +# steps: +# - name: Clone +# uses: actions/checkout@v1 +# +# - name: Add msbuild to PATH +# uses: microsoft/setup-msbuild@v1 +# +# - name: Configure +# run: > +# cmake -S . -B ./build -A ${{ matrix.arch }} +# -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# +# - name: Build +# run: | +# cd ./build +# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} +# +# - name: Upload binaries +# uses: actions/upload-artifact@v1 +# with: +# name: llama-bin-${{ matrix.arch }} +# path: build/bin/${{ matrix.build }} +# +# windows-blas: +# runs-on: windows-latest +# +# strategy: +# matrix: +# build: [Release] +# arch: [Win32, x64] +# blas: [ON] +# include: +# - arch: Win32 +# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip +# s2arc: x86 +# - arch: x64 +# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip +# s2arc: x64 +# +# steps: +# - name: Clone +# uses: actions/checkout@v1 +# +# - name: Add msbuild to PATH +# uses: microsoft/setup-msbuild@v1 +# +# - name: Fetch OpenBLAS +# if: matrix.blas == 'ON' +# run: | +# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }} +# 7z x blas.zip -oblas -y +# copy blas/include/cblas.h . +# copy blas/include/openblas_config.h . +# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV +# +# - name: Configure +# run: > +# cmake -S . -B ./build -A ${{ matrix.arch }} +# -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }} +# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib" +# +# - name: Build +# run: | +# cd ./build +# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} +# +# - name: Copy libopenblas.dll +# if: matrix.blas == 'ON' +# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }} +# +# - name: Upload binaries +# if: matrix.blas == 'ON' +# uses: actions/upload-artifact@v1 +# with: +# name: llama-blas-bin-${{ matrix.arch }} +# path: build/bin/${{ matrix.build }} +# +# emscripten: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# build: [Release] +# +# steps: +# - name: Clone +# uses: actions/checkout@v1 +# +# - name: Dependencies +# run: | +# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz +# tar -xvf master.tar.gz +# emsdk-master/emsdk update +# emsdk-master/emsdk install latest +# emsdk-master/emsdk activate latest +# +# - name: Configure +# run: echo "tmp" +# +# - name: Build +# run: | +# pushd emsdk-master +# source ./emsdk_env.sh +# popd +# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# make diff --git a/ctransformers/models/submodules/ggllm.cpp/.github/workflows/docker.yml b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/docker.yml new file mode 100644 index 0000000000000000000000000000000000000000..379fbd7ad35f115b1477a7908310335040f9a37d --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/docker.yml @@ -0,0 +1,65 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# GitHub recommends pinning actions to a commit SHA. +# To get a newer version, you will need to update the SHA. +# You can also reference a tag or branch, but the action may change without warning. + +name: Publish Docker image + +on: + pull_request: + push: + branches: + - master + +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + if: github.event.pull_request.draft == false + + runs-on: ubuntu-latest + env: + COMMIT_SHA: ${{ github.sha }} + strategy: + matrix: + config: + - { tag: "light", dockerfile: ".devops/main.Dockerfile" } + - { tag: "full", dockerfile: ".devops/full.Dockerfile" } + steps: + - name: Check out the repo + uses: actions/checkout@v3 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image (versioned) + if: github.event_name == 'push' + uses: docker/build-push-action@v4 + with: + context: . + push: true + platforms: linux/amd64,linux/arm64 + tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" + file: ${{ matrix.config.dockerfile }} + + - name: Build and push Docker image (tagged) + uses: docker/build-push-action@v4 + with: + context: . + push: ${{ github.event_name == 'push' }} + platforms: linux/amd64,linux/arm64 + tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}" + file: ${{ matrix.config.dockerfile }} diff --git a/ctransformers/models/submodules/ggllm.cpp/.github/workflows/editorconfig.yml b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/editorconfig.yml new file mode 100644 index 0000000000000000000000000000000000000000..b4e535acf1f647e506105ac71ca1520dc6dad02a --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/editorconfig.yml @@ -0,0 +1,17 @@ +name: EditorConfig Checker + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + editorconfig: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: editorconfig-checker/action-editorconfig-checker@main + - run: editorconfig-checker diff --git a/ctransformers/models/submodules/ggllm.cpp/.github/workflows/tidy-post.yml b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/tidy-post.yml new file mode 100644 index 0000000000000000000000000000000000000000..03652760c80dc5f90caa6faec939fa4870aad4fa --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/tidy-post.yml @@ -0,0 +1,20 @@ +name: clang-tidy review post comments + +on: + workflow_dispatch: + workflows: ["clang-tidy-review"] + types: + - completed + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: ZedThree/clang-tidy-review/post@v0.13.0 + # lgtm_comment_body, max_comments, and annotations need to be set on the posting workflow in a split setup + with: + # adjust options as necessary + lgtm_comment_body: '' + annotations: false + max_comments: 25 diff --git a/ctransformers/models/submodules/ggllm.cpp/.github/workflows/tidy-review.yml b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/tidy-review.yml new file mode 100644 index 0000000000000000000000000000000000000000..a4bc8d976560e59bb9cf0823add738b3a0a2265e --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.github/workflows/tidy-review.yml @@ -0,0 +1,23 @@ +name: clang-tidy-review + +on: + pull_request: + branches: + - master + +jobs: + clang-tidy-review: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - uses: ZedThree/clang-tidy-review@v0.13.0 + id: review + with: + lgtm_comment_body: '' + build_dir: build + cmake_command: cmake . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=on + split_workflow: true + + - uses: ZedThree/clang-tidy-review/upload@v0.13.0 diff --git a/ctransformers/models/submodules/ggllm.cpp/.gitignore b/ctransformers/models/submodules/ggllm.cpp/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ecb723bc0ffc0448300cac2c04b45ccc0f7e06e5 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.gitignore @@ -0,0 +1,61 @@ +*.o +*.a +.DS_Store +.build/ +.cache/ +.direnv/ +.envrc +.swiftpm +.venv +.clang-tidy +.vs/ +.vscode/ + +build/ +build-em/ +build-debug/ +build-release/ +build-static/ +build-cublas/ +build-opencl/ +build-metal/ +build-no-accel/ +build-sanitize-addr/ +build-sanitize-thread/ +out/ + +models/* +*.bin + +/main +/quantize +/quantize-stats +/result +/perplexity +/embedding +/train-text-from-scratch +/simple +/benchmark-matmult +/vdot +/server +/Pipfile +/libllama.so + +build-info.h +arm_neon.h +compile_commands.json +CMakeSettings.json + +__pycache__ + +zig-out/ +zig-cache/ + +ppl-*.txt +qnt-*.txt +perf-*.txt + +examples/jeopardy/results.txt +demo_falcon_orig.cpp +.github/workflows/build.yml +.github/workflows/build.yml diff --git a/ctransformers/models/submodules/ggllm.cpp/.pre-commit-config.yaml b/ctransformers/models/submodules/ggllm.cpp/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65796fe2e423b3fd2d89f6f1938da5ac5da6bf0d --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +exclude: prompts/.*.txt +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 diff --git a/ctransformers/models/submodules/ggllm.cpp/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..677ce954336b0f5d15e81c50cb6c5881952a8ac3 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/CMakeLists.txt @@ -0,0 +1,540 @@ +cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason +# If CUDA toolkit is not found using msvc compiler switch to Community Edition (same compiler, just other kit..) +project("ggllm.cpp" C CXX) +add_definitions(-DGGML_PERF=1) # use "--debug-timings 1-3" to enable timing output +include_directories("C:/program files/NVIDIA GPU Computing Toolkit/CUDA/v12.0/include") +include_directories("C:/program files/NVIDIA GPU Computing Toolkit/CUDA/v12.0/lib/x64") +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif() + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(LLAMA_STANDALONE ON) + + # configure project version + # TODO +else() + set(LLAMA_STANDALONE OFF) +endif() + +if (EMSCRIPTEN) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + + option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON) +else() + if (MINGW) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + else() + set(BUILD_SHARED_LIBS_DEFAULT ON) + endif() +endif() + + +# +# Option list +# + +# general +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +# instruction set specific +option(LLAMA_AVX "llama: enable AVX" ON) +option(LLAMA_AVX2 "llama: enable AVX2" ON) +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" ON) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" ON) +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +option(LLAMA_CUBLAS "llama: use cuBLAS" ON) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_METAL "llama: use Metal" OFF) +option(LLAMA_K_QUANTS "llama: use k-quants" ON) + +option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) + +# +# Build info header +# + +# Generate initial build-info.h +include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) + +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/.git") + + # Is git submodule + if(NOT IS_DIRECTORY "${GIT_DIR}") + file(READ ${GIT_DIR} REAL_GIT_DIR_LINK) + string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK}) + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${REAL_GIT_DIR}") + endif() + + # Add a custom target for build-info.h + add_custom_target(BUILD_INFO ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h") + + # Add a custom command to rebuild build-info.h when .git/index changes + add_custom_command( + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h" + COMMENT "Generating build details from Git" + COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS "${GIT_DIR}/index" + VERBATIM + ) +else() + message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") +endif() + +# +# Compile flags +# + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED true) +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED true) +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) + +if (NOT MSVC) + if (LLAMA_SANITIZE_THREAD) + add_compile_options(-fsanitize=thread) + link_libraries(-fsanitize=thread) + endif() + + if (LLAMA_SANITIZE_ADDRESS) + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) + link_libraries(-fsanitize=address) + endif() + + if (LLAMA_SANITIZE_UNDEFINED) + add_compile_options(-fsanitize=undefined) + link_libraries(-fsanitize=undefined) + endif() +endif() + +if (APPLE AND LLAMA_ACCELERATE) + find_library(ACCELERATE_FRAMEWORK Accelerate) + if (ACCELERATE_FRAMEWORK) + message(STATUS "Accelerate framework found") + + add_compile_definitions(GGML_USE_ACCELERATE) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) + else() + message(WARNING "Accelerate framework not found") + endif() +endif() + +if (LLAMA_BLAS) + if (LLAMA_STATIC) + set(BLA_STATIC ON) + endif() + if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22) + set(BLA_SIZEOF_INTEGER 8) + endif() + + set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) + find_package(BLAS) + + if (BLAS_FOUND) + message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") + + if ("${BLAS_INCLUDE_DIRS}" STREQUAL "") + # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. + # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 + find_package(PkgConfig REQUIRED) + if (${LLAMA_BLAS_VENDOR} MATCHES "Generic") + pkg_check_modules(DepBLAS REQUIRED blas) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "OpenBLAS") + pkg_check_modules(DepBLAS REQUIRED openblas) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "FLAME") + pkg_check_modules(DepBLAS REQUIRED blis) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "ATLAS") + pkg_check_modules(DepBLAS REQUIRED blas-atlas) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "FlexiBLAS") + pkg_check_modules(DepBLAS REQUIRED flexiblas_api) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel") + # all Intel* libraries share the same include path + pkg_check_modules(DepBLAS REQUIRED mkl-sdl) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC") + # this doesn't provide pkg-config + # suggest to assign BLAS_INCLUDE_DIRS on your own + if ("${NVHPC_VERSION}" STREQUAL "") + message(WARNING "Better to set NVHPC_VERSION") + else() + set(DepBLAS_FOUND ON) + set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") + endif() + endif() + if (DepBLAS_FOUND) + set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) + else() + message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" + " detected by pkgconfig, trying to find cblas.h from possible paths...") + find_path(BLAS_INCLUDE_DIRS + NAMES cblas.h + HINTS + /usr/include + /usr/local/include + /usr/include/openblas + /opt/homebrew/opt/openblas/include + /usr/local/opt/openblas/include + /usr/include/x86_64-linux-gnu/openblas/include + ) + endif() + endif() + + message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") + add_compile_options(${BLAS_LINKER_FLAGS}) + add_compile_definitions(GGML_USE_OPENBLAS) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES}) + set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS}) + + else() + message(WARNING "BLAS not found, please refer to " + "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" + " to set correct LLAMA_BLAS_VENDOR") + endif() +endif() + +if (LLAMA_CUBLAS) + cmake_minimum_required(VERSION 3.17) + + find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) + message(STATUS "cuBLAS found") + + enable_language(CUDA) + message(STATUS "CUDA found, version: ${CUDAToolkit_VERSION}") + + set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h) + + add_compile_definitions(GGML_USE_CUBLAS) + add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) + add_compile_definitions(GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y}) + add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) + + if (LLAMA_STATIC) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) + else() + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) + endif() + + else() + message(WARNING "cuBLAS not found") + endif() +endif() + +if (LLAMA_METAL) + find_library(FOUNDATION_LIBRARY Foundation REQUIRED) + find_library(METAL_FRAMEWORK Metal REQUIRED) + find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) + find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED) + + set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h) + + add_compile_definitions(GGML_USE_METAL) + add_compile_definitions(GGML_METAL_NDEBUG) + + # get full path to the file + #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") + + # copy ggml-metal.metal to bin directory + configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY) + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} + ${FOUNDATION_LIBRARY} + ${METAL_FRAMEWORK} + ${METALKIT_FRAMEWORK} + ${METALPERFORMANCE_FRAMEWORK} + ) +endif() + +if (LLAMA_K_QUANTS) + set(GGML_SOURCES_EXTRA ${GGML_SOURCES_EXTRA} k_quants.c k_quants.h) + add_compile_definitions(GGML_USE_K_QUANTS) +endif() + +if (LLAMA_CLBLAST) + find_package(CLBlast) + if (CLBlast_FOUND) + message(STATUS "CLBlast found") + + set(GGML_SOURCES_OPENCL ggml-opencl.cpp ggml-opencl.h) + + add_compile_definitions(GGML_USE_CLBLAST) + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast) + else() + message(WARNING "CLBlast not found") + endif() +endif() + +if (LLAMA_ALL_WARNINGS) + if (NOT MSVC) + set(c_flags + -Wall + -Wextra + -Wpedantic + -Wcast-qual + -Wdouble-promotion + -Wshadow + -Wstrict-prototypes + -Wpointer-arith + ) + set(cxx_flags + -Wall + -Wextra + -Wpedantic + -Wcast-qual + -Wno-unused-function + -Wno-multichar + ) + else() + # todo : msvc + endif() + + add_compile_options( + "$<$:${c_flags}>" + "$<$:${cxx_flags}>" + ) + +endif() + +if (MSVC) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + + if (BUILD_SHARED_LIBS) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + endif() +endif() + +if (LLAMA_LTO) + include(CheckIPOSupported) + check_ipo_supported(RESULT result OUTPUT output) + if (result) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(WARNING "IPO is not supported: ${output}") + endif() +endif() + +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +if (NOT MSVC) + if (LLAMA_STATIC) + add_link_options(-static) + if (MINGW) + add_link_options(-static-libgcc -static-libstdc++) + endif() + endif() + if (LLAMA_GPROF) + add_compile_options(-pg) + endif() + if (LLAMA_NATIVE) + add_compile_options(-march=native) + endif() +endif() + +if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + message(STATUS "ARM detected") + if (MSVC) + # TODO: arm msvc? + else() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + # Apple M1, M2, etc. + # Raspberry Pi 3, 4, Zero 2 (64-bit) + add_compile_options(-mcpu=native) + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") + # Raspberry Pi 1, Zero + add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access) + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") + # Raspberry Pi 2 + add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations) + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") + # Raspberry Pi 3, 4, Zero 2 (32-bit) + add_compile_options(-mfp16-format=ieee -mno-unaligned-access) + endif() + endif() +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") + message(STATUS "x86 detected") + if (MSVC) + if (LLAMA_AVX512) + add_compile_options($<$:/arch:AVX512>) + add_compile_options($<$:/arch:AVX512>) + # MSVC has no compile-time flags enabling specific + # AVX512 extensions, neither it defines the + # macros corresponding to the extensions. + # Do it manually. + if (LLAMA_AVX512_VBMI) + add_compile_definitions($<$:__AVX512VBMI__>) + add_compile_definitions($<$:__AVX512VBMI__>) + endif() + if (LLAMA_AVX512_VNNI) + add_compile_definitions($<$:__AVX512VNNI__>) + add_compile_definitions($<$:__AVX512VNNI__>) + endif() + elseif (LLAMA_AVX2) + add_compile_options($<$:/arch:AVX2>) + add_compile_options($<$:/arch:AVX2>) + elseif (LLAMA_AVX) + add_compile_options($<$:/arch:AVX>) + add_compile_options($<$:/arch:AVX>) + endif() + else() + if (LLAMA_F16C) + add_compile_options(-mf16c) + endif() + if (LLAMA_FMA) + add_compile_options(-mfma) + endif() + if (LLAMA_AVX) + add_compile_options(-mavx) + endif() + if (LLAMA_AVX2) + add_compile_options(-mavx2) + endif() + if (LLAMA_AVX512) + add_compile_options(-mavx512f) + add_compile_options(-mavx512bw) + endif() + if (LLAMA_AVX512_VBMI) + add_compile_options(-mavx512vbmi) + endif() + if (LLAMA_AVX512_VNNI) + add_compile_options(-mavx512vnni) + endif() + endif() +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") + message(STATUS "PowerPC detected") + add_compile_options(-mcpu=native -mtune=native) + #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) +else() + message(STATUS "Unknown architecture") +endif() + +# +# Build libraries +# + +add_library(ggml OBJECT + ggml.c + ggml.h + ${GGML_SOURCES_CUDA} + ${GGML_SOURCES_OPENCL} + ${GGML_SOURCES_METAL} + ${GGML_SOURCES_EXTRA} + ) + +target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES}) +target_compile_features(ggml PUBLIC c_std_11) # don't bump +target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) + +add_library(ggml_static STATIC $) +if (BUILD_SHARED_LIBS) + set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) + add_library(ggml_shared SHARED $) +endif() + +add_library(llama + llama.cpp + llama.h + llama-util.h + ) + +target_include_directories(llama PUBLIC .) +target_compile_features(llama PUBLIC cxx_std_11) # don't bump +target_link_libraries(llama PRIVATE + ggml + ${LLAMA_EXTRA_LIBS} + ) + +# falcon +add_library(libfalcon + libfalcon.cpp + libfalcon.h + llama-util.h + ) +target_include_directories(libfalcon PUBLIC .) +target_compile_features(libfalcon PUBLIC cxx_std_11) # don't bump +target_link_libraries(libfalcon PRIVATE + ggml + ${LLAMA_EXTRA_LIBS} + ) +# + +if (BUILD_SHARED_LIBS) + set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD) + if (LLAMA_METAL) + set_target_properties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") + endif() + + # falcon + set_target_properties(libfalcon PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(libfalcon PRIVATE LLAMA_SHARED LLAMA_BUILD) + if (LLAMA_METAL) + set_target_properties(libfalcon PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") + endif() + # +endif() + +if (GGML_SOURCES_CUDA) + message(STATUS "GGML CUDA sources found, configuring CUDA architecture") + set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF) + set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") + set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF) + # falcon + set_property(TARGET libfalcon PROPERTY CUDA_ARCHITECTURES OFF) + +endif() + + +# +# programs, examples and tests +# + +if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) + include(CTest) + add_subdirectory(tests) +endif () + +if (LLAMA_BUILD_EXAMPLES) + add_subdirectory(examples) + add_subdirectory(pocs) +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/LICENSE b/ctransformers/models/submodules/ggllm.cpp/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..76f67efdc6470081b512a8db5bf2b1d4962d9c3c --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Georgi Gerganov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ctransformers/models/submodules/ggllm.cpp/Makefile b/ctransformers/models/submodules/ggllm.cpp/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..32389ee1ef046e94c462327b9af140d0f62f6bb6 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/Makefile @@ -0,0 +1,336 @@ +# Define the default target now so that it is always the first target +BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple + +ifdef LLAMA_BUILD_SERVER + BUILD_TARGETS += server + LLAMA_SERVER_VERBOSE ?= 1 +server: private CXXFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE) +endif + +default: $(BUILD_TARGETS) + +ifndef UNAME_S +UNAME_S := $(shell uname -s) +endif + +ifndef UNAME_P +UNAME_P := $(shell uname -p) +endif + +ifndef UNAME_M +UNAME_M := $(shell uname -m) +endif + +CCV := $(shell $(CC) --version | head -n 1) +CXXV := $(shell $(CXX) --version | head -n 1) + +# Mac OS + Arm can report x86_64 +# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 +ifeq ($(UNAME_S),Darwin) + ifneq ($(UNAME_P),arm) + SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null) + ifeq ($(SYSCTL_M),1) + # UNAME_P := arm + # UNAME_M := arm64 + warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789) + endif + endif +endif + +# +# Compile flags +# + +# keep standard at C11 and C++11 +# -Ofast tends to produce faster code, but may not be available for some compilers. +#OPT = -Ofast +OPT = -O3 +CFLAGS = -I. $(OPT) -std=c11 -fPIC +CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC +LDFLAGS = + +ifdef LLAMA_DEBUG + CFLAGS += -O0 -g + CXXFLAGS += -O0 -g + LDFLAGS += -g +else + CFLAGS += -DNDEBUG + CXXFLAGS += -DNDEBUG +endif + +# warnings +CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith +CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar + +# OS specific +# TODO: support Windows +ifeq ($(UNAME_S),Linux) + CFLAGS += -pthread + CXXFLAGS += -pthread +endif +ifeq ($(UNAME_S),Darwin) + CFLAGS += -pthread + CXXFLAGS += -pthread +endif +ifeq ($(UNAME_S),FreeBSD) + CFLAGS += -pthread + CXXFLAGS += -pthread +endif +ifeq ($(UNAME_S),NetBSD) + CFLAGS += -pthread + CXXFLAGS += -pthread +endif +ifeq ($(UNAME_S),OpenBSD) + CFLAGS += -pthread + CXXFLAGS += -pthread +endif +ifeq ($(UNAME_S),Haiku) + CFLAGS += -pthread + CXXFLAGS += -pthread +endif + +ifdef LLAMA_GPROF + CFLAGS += -pg + CXXFLAGS += -pg +endif +ifdef LLAMA_PERF + CFLAGS += -DGGML_PERF + CXXFLAGS += -DGGML_PERF +endif + +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue +ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) + # Use all CPU extensions that are available: + CFLAGS += -march=native -mtune=native + CXXFLAGS += -march=native -mtune=native + + # Usage AVX-only + #CFLAGS += -mfma -mf16c -mavx + #CXXFLAGS += -mfma -mf16c -mavx + + # Usage SSSE3-only (Not is SSE3!) + #CFLAGS += -mssse3 + #CXXFLAGS += -mssse3 +endif + +ifneq ($(filter ppc64%,$(UNAME_M)),) + POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) + ifneq (,$(findstring POWER9,$(POWER9_M))) + CFLAGS += -mcpu=power9 + CXXFLAGS += -mcpu=power9 + endif + # Require c++23's std::byteswap for big-endian support. + ifeq ($(UNAME_M),ppc64) + CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN + endif +endif + +ifndef LLAMA_NO_K_QUANTS + CFLAGS += -DGGML_USE_K_QUANTS + CXXFLAGS += -DGGML_USE_K_QUANTS + OBJS += k_quants.o +endif + +ifndef LLAMA_NO_ACCELERATE + # Mac M1 - include Accelerate framework. + # `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time). + ifeq ($(UNAME_S),Darwin) + CFLAGS += -DGGML_USE_ACCELERATE + LDFLAGS += -framework Accelerate + endif +endif # LLAMA_NO_ACCELERATE + +ifdef LLAMA_OPENBLAS + CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas -I/usr/include/openblas + LDFLAGS += -lopenblas +endif # LLAMA_OPENBLAS + +ifdef LLAMA_BLIS + CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis + LDFLAGS += -lblis -L/usr/local/lib +endif # LLAMA_BLIS + +ifdef LLAMA_CUBLAS + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib + OBJS += ggml-cuda.o + NVCC = nvcc + NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native +ifdef LLAMA_CUDA_DMMV_X + NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) +else + NVCCFLAGS += -DGGML_CUDA_DMMV_X=32 +endif # LLAMA_CUDA_DMMV_X +ifdef LLAMA_CUDA_DMMV_Y + NVCCFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y) +else + NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1 +endif # LLAMA_CUDA_DMMV_Y +ifdef LLAMA_CUDA_KQUANTS_ITER + NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) +else + NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 +endif +ggml-cuda.o: ggml-cuda.cu ggml-cuda.h + $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@ +endif # LLAMA_CUBLAS + +ifdef LLAMA_CLBLAST + CFLAGS += -DGGML_USE_CLBLAST + CXXFLAGS += -DGGML_USE_CLBLAST + # Mac provides OpenCL as a framework + ifeq ($(UNAME_S),Darwin) + LDFLAGS += -lclblast -framework OpenCL + else + LDFLAGS += -lclblast -lOpenCL + endif + OBJS += ggml-opencl.o + +ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h + $(CXX) $(CXXFLAGS) -c $< -o $@ +endif # LLAMA_CLBLAST + +ifdef LLAMA_METAL + CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG + CXXFLAGS += -DGGML_USE_METAL + LDFLAGS += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders + OBJS += ggml-metal.o + +ggml-metal.o: ggml-metal.m ggml-metal.h + $(CC) $(CFLAGS) -c $< -o $@ +endif # LLAMA_METAL + +ifneq ($(filter aarch64%,$(UNAME_M)),) + # Apple M1, M2, etc. + # Raspberry Pi 3, 4, Zero 2 (64-bit) + CFLAGS += -mcpu=native + CXXFLAGS += -mcpu=native +endif + +ifneq ($(filter armv6%,$(UNAME_M)),) + # Raspberry Pi 1, Zero + CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access +endif + +ifneq ($(filter armv7%,$(UNAME_M)),) + # Raspberry Pi 2 + CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations +endif + +ifneq ($(filter armv8%,$(UNAME_M)),) + # Raspberry Pi 3, 4, Zero 2 (32-bit) + CFLAGS += -mfp16-format=ieee -mno-unaligned-access +endif + +ifdef LLAMA_NO_K_QUANTS +k_quants.o: k_quants.c k_quants.h + $(CC) $(CFLAGS) -c $< -o $@ +endif # LLAMA_NO_K_QUANTS + +# +# Print build information +# + +$(info I llama.cpp build info: ) +$(info I UNAME_S: $(UNAME_S)) +$(info I UNAME_P: $(UNAME_P)) +$(info I UNAME_M: $(UNAME_M)) +$(info I CFLAGS: $(CFLAGS)) +$(info I CXXFLAGS: $(CXXFLAGS)) +$(info I LDFLAGS: $(LDFLAGS)) +$(info I CC: $(CCV)) +$(info I CXX: $(CXXV)) +$(info ) + +# +# Build library +# + +ggml.o: ggml.c ggml.h ggml-cuda.h + $(CC) $(CFLAGS) -c $< -o $@ + +llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +libfalcon.o: libfalcon.cpp ggml.h ggml-cuda.h libfalcon.h llama-util.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +common.o: examples/common.cpp examples/common.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +falcon_common.o: examples/falcon_common.cpp examples/falcon_common.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +libllama.so: llama.o ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) + +clean: + rm -vf *.o *.so main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server vdot train-text-from-scratch build-info.h + +# +# Examples +# + +main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + @echo + @echo '==== Run ./main -h for help. ====' + @echo + +simple: examples/simple/simple.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +falcon_quantize: examples/falcon_quantize/quantize.cpp build-info.h ggml.o libfalcon.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +falcon_perplexity: examples/falcon_perplexity/falcon_perplexity.cpp build-info.h ggml.o libfalcon.o falcon_common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) + +train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + +build-info.h: $(wildcard .git/index) scripts/build-info.sh + @sh scripts/build-info.sh > $@.tmp + @if ! cmp -s $@.tmp $@; then \ + mv $@.tmp $@; \ + else \ + rm $@.tmp; \ + fi + +falcon_main: examples/falcon/falcon_main.cpp build-info.h ggml.o libfalcon.o falcon_common.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) +# +# Tests +# + +benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + ./$@ + +vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) + +.PHONY: tests clean +tests: + bash ./tests/run-tests.sh diff --git a/ctransformers/models/submodules/ggllm.cpp/Package.swift b/ctransformers/models/submodules/ggllm.cpp/Package.swift new file mode 100644 index 0000000000000000000000000000000000000000..73d027c70215495a5b5233b95110a2c3f622d46c --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/Package.swift @@ -0,0 +1,24 @@ +// swift-tools-version:5.3 + +import PackageDescription + +let package = Package( + name: "llama", + products: [ + .library(name: "llama", targets: ["llama"]), + ], + targets: [ + .target( + name: "llama", + path: ".", + exclude: ["ggml-metal.metal"], + sources: ["ggml.c", "llama.cpp"], + publicHeadersPath: "spm-headers", + cSettings: [.unsafeFlags(["-Wno-shorten-64-to-32"]), .define("GGML_USE_ACCELERATE")], + linkerSettings: [ + .linkedFramework("Accelerate") + ] + ), + ], + cxxLanguageStandard: .cxx11 +) diff --git a/ctransformers/models/submodules/ggllm.cpp/README.md b/ctransformers/models/submodules/ggllm.cpp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7101700af82ba5cb29c7ae12af3e32f8a60a8867 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/README.md @@ -0,0 +1,137 @@ +llama.cpp modification to run Falcon (work in progress) + +**The Bloke features fine tuned weights in ggml v3 with various quantization options:** +https://huggingface.co/TheBloke/falcon-40b-instruct-GGML +https://huggingface.co/TheBloke/WizardLM-Uncensored-Falcon-40B-GGML +https://huggingface.co/TheBloke/falcon-7b-instruct-GGML +https://huggingface.co/TheBloke/WizardLM-Uncensored-Falcon-7B-GGML + +**The official HF models are here:** +https://huggingface.co/tiiuae/falcon-40b/ +https://huggingface.co/tiiuae/falcon-7b/ +https://huggingface.co/tiiuae/falcon-40b-instruct +https://huggingface.co/tiiuae/falcon-7b-instruct + +**Conversion:** +1) use falcon_convert.py to produce a GGML v1 binary from HF - not recommended to be used directly +2) use examples/falcon_quantize to convert these into memory aligned GGMLv3 binaries of your choice including mmap support from there on +_Important: The Falcon 7B model features tensor sizes which are not yet supported by K-type quantizers - use the traditional quantization for those_ + +**Status/Bugs:** +* On linux Q5_1 7B user reports a batch token ingestion context memory issue, with -b 1 it's gone. Not reproduced on Windows + +**How to compile:** +``` +How to build: +1) Recommended with cmake: (change the CUBLAS flag to 0 to disable CUDA requirements and support) +git clone +cd ggllm.cpp +rm -rf build; mkdir build; cd build +cmake -DLLAMA_CUBLAS=1 .. +cmake --build . --config Release +# find binaries in ./bin + + +2) Installing on WSL (Windows Subsystem for Linux) +# I am getting slightly better timings on WSL than native windows +# Use --no-mmap in WSL OR copy the model into native directory (not /mnt/) or it will get stuck loading (thanks @nauful) +#Choose a current distro: +wsl.exe --list --online +wsl --install -d distro +# cmake 3.16 is required and the cuda toolset +# If you run an old distro you can upgrade (like apt update; apt upgrade; apt full-upgrade; pico /etc/apt/sources.list/; apt update; apt upgrade; apt full-upgrade; apt autoremove; lsb_release -a); then wsl --shutdown and restart it +# install cuda WSL toolkit +wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.0-1_all.deb +dpkg -i cuda-keyring_1.0-1_all.deb +apt-get update; apt-get -y install cuda +# you might need to add it to your path: +export LD_LIBRARY_PATH="/usr/local/cuda-12.1/lib64:$LD_LIBRARY_PATH" +export PATH="/usr/local/cuda-12.1/bin:$PATH" +# now start with a fresh cmake and all should work +``` + +**CUDA:** +Only some tensors supported currently, only mul_mat operation supported currently +q3_k timing on 3090 of Falcon 40B: +falcon_print_timings: prompt eval time = 702.55 ms / 3 tokens ( 234.18 ms per token) +falcon_print_timings: eval time = 3350.65 ms / 24 runs ( 139.61 ms per token) + +q4_k timing on 3090 of falcon 40B (partial offload): +falcon_print_timings: prompt eval time = 590.82 ms / 3 tokens ( 196.94 ms per token) +falcon_print_timings: eval time = 2817.37 ms / 24 runs ( 117.39 ms per token) + +q4_1 timing on 3090 of falcon 7B: +falcon_print_timings: prompt eval time = 115.30 ms / 3 tokens ( 38.43 ms per token) +falcon_print_timings: eval time = 5926.74 ms / 147 runs ( 40.32 ms per token) + + +CUDA sidenote: +1) use 1 less threads than you have physical processor cores +2) If it's too slow and GPU memory is at 100% then the automated tensor skip is not working properly, reduce --ngl until gpu memory does not saturate fully at first inference + + +It appears the Q5 Falcon 40B inference time on CPU is as fast as the A100 fp16 inference time at 2 tk/second +CPU inference examples: +``` + Q:\ggllm.cpp> .\build\bin\Release\falcon_main.exe -t 31 -m Q:\models\falcon-40b\q5_1 -p "Love relates to hate like" -n 50 -ngl 0 +main: build = 677 (dd3d346) +main: seed = 1687010794 +ggml_init_cublas: found 1 CUDA devices: + Device 0: NVIDIA GeForce RTX 3090 +falcon.cpp: loading model from Q:\models\falcon-40b\q5_1 +falcon_model_load_internal: format = ggjt v3 (latest) +falcon_model_load_internal: n_vocab = 65024 +falcon_model_load_internal: n_ctx = 512 +falcon_model_load_internal: n_embd = 8192 +falcon_model_load_internal: n_head = 128 +falcon_model_load_internal: n_head_kv = 8 +falcon_model_load_internal: n_layer = 60 +falcon_model_load_internal: version = 40 +falcon_model_load_internal: ftype = 9 (mostly Q5_1) +falcon_model_load_internal: n_ff = 32768 +falcon_model_load_internal: n_parts = 1 +falcon_model_load_internal: model size = 40B +falcon_model_load_internal: ggml ctx size = 0.00 MB (mmap size = 29929.00 MB) +falcon_model_load_internal: using CUDA for GPU acceleration +falcon_model_load_internal: mem required = 33513.70 MB (+ 120.00 MB per state) +falcon_model_load_internal: offloading 0 layers to GPU +falcon_model_load_internal: total VRAM used: 512 MB +................................................................................................... +falcon_init_from_file: kv self size = 120.00 MB + +system_info: n_threads = 31 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | VSX = 0 | +sampling: repeat_last_n = 64, repeat_penalty = 1.100000, presence_penalty = 0.000000, frequency_penalty = 0.000000, top_k = 40, tfs_z = 1.000000, top_p = 0.950000, typical_p = 1.000000, temp = 0.800000, mirostat = 0, mirostat_lr = 0.100000, mirostat_ent = 5.000000 +generate: n_ctx = 512, n_batch = 512, n_predict = 50, n_keep = 0 + + +Love relates to hate like light relates to darkness. +Love is the strongest thing in the world, but hate is the second strongest force. +Love is a force multiplier. +For every moment of love, there is a parallel moment of hate. +You can’t +falcon_print_timings: load time = 4420.23 ms +falcon_print_timings: sample time = 11.34 ms / 50 runs ( 0.23 ms per token) +falcon_print_timings: prompt eval time = 785.42 ms / 5 tokens ( 157.08 ms per token) +falcon_print_timings: eval time = 27512.23 ms / 49 runs ( 561.47 ms per token) +falcon_print_timings: total time = 28315.91 ms +``` + + +Below are Falcon 7B tests: +**Q5_1 is working, comes with ggml v3 as a bonus (mmap support)** +``` +falcon_model_load_internal: ftype = 9 (mostly Q5_1) +falcon_print_timings: load time = 952.24 ms +falcon_print_timings: sample time = 67.91 ms / 300 runs ( 0.23 ms per token) +falcon_print_timings: prompt eval time = 370.94 ms / 14 tokens ( 26.50 ms per token) +falcon_print_timings: eval time = 50367.68 ms / 299 runs ( 168.45 ms per token) +``` +**Q4_1 is working as well** +``` +falcon_print_timings: load time = 864.40 ms +falcon_print_timings: sample time = 22.68 ms / 100 runs ( 0.23 ms per token) +falcon_print_timings: prompt eval time = 287.00 ms / 14 tokens ( 20.50 ms per token) +falcon_print_timings: eval time = 12233.39 ms / 99 runs ( 123.57 ms per token) +``` + +Q_K_*: not working (no segfaults anymore, looks like an error in qkv handling as it's outputting garbage. diff --git a/ctransformers/models/submodules/ggllm.cpp/SHA256SUMS b/ctransformers/models/submodules/ggllm.cpp/SHA256SUMS new file mode 100644 index 0000000000000000000000000000000000000000..ca4d5a4a53531ebe048305f4ac503b5d6fff4b2a --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/SHA256SUMS @@ -0,0 +1,40 @@ +700df0d3013b703a806d2ae7f1bfb8e59814e3d06ae78be0c66368a50059f33d models/7B/consolidated.00.pth +666a4bb533b303bdaf89e1b6a3b6f93535d868de31d903afdc20983dc526c847 models/7B/ggml-model-f16.bin +ec2f2d1f0dfb73b72a4cbac7fa121abbe04c37ab327125a38248f930c0f09ddf models/7B/ggml-model-q4_0.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q4_1.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q5_0.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/7B/ggml-model-q5_1.bin +7e89e242ddc0dd6f060b43ca219ce8b3e8f08959a72cb3c0855df8bb04d46265 models/7B/params.json +745bf4e29a4dd6f411e72976d92b452da1b49168a4f41c951cfcc8051823cf08 models/13B/consolidated.00.pth +d5ccbcc465c71c0de439a5aeffebe8344c68a519bce70bc7f9f92654ee567085 models/13B/consolidated.01.pth +2b206e9b21fb1076f11cafc624e2af97c9e48ea09312a0962153acc20d45f808 models/13B/ggml-model-f16.bin +fad169e6f0f575402cf75945961cb4a8ecd824ba4da6be2af831f320c4348fa5 models/13B/ggml-model-q4_0.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q4_1.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q5_0.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/13B/ggml-model-q5_1.bin +4ab77bec4d4405ccb66a97b282574c89a94417e3c32e5f68f37e2876fc21322f models/13B/params.json +e23294a58552d8cdec5b7e8abb87993b97ea6eced4178ff2697c02472539d067 models/30B/consolidated.00.pth +4e077b7136c7ae2302e954860cf64930458d3076fcde9443f4d0e939e95903ff models/30B/consolidated.01.pth +24a87f01028cbd3a12de551dcedb712346c0b5cbdeff1454e0ddf2df9b675378 models/30B/consolidated.02.pth +1adfcef71420886119544949767f6a56cb6339b4d5fcde755d80fe68b49de93b models/30B/consolidated.03.pth +7e1b524061a9f4b27c22a12d6d2a5bf13b8ebbea73e99f218809351ed9cf7d37 models/30B/ggml-model-f16.bin +d2a441403944819492ec8c2002cc36fa38468149bfb4b7b4c52afc7bd9a7166d models/30B/ggml-model-q4_0.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q4_1.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q5_0.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/30B/ggml-model-q5_1.bin +2c07118ea98d69dbe7810d88520e30288fa994751b337f8fca02b171955f44cb models/30B/params.json +135c563f6b3938114458183afb01adc9a63bef3d8ff7cccc3977e5d3664ecafe models/65B/consolidated.00.pth +9a600b37b19d38c7e43809485f70d17d1dc12206c07efa83bc72bb498a568bde models/65B/consolidated.01.pth +e7babf7c5606f165a3756f527cb0fedc4f83e67ef1290391e52fb1cce5f26770 models/65B/consolidated.02.pth +73176ffb426b40482f2aa67ae1217ef79fbbd1fff5482bae5060cdc5a24ab70e models/65B/consolidated.03.pth +882e6431d0b08a8bc66261a0d3607da21cbaeafa96a24e7e59777632dbdac225 models/65B/consolidated.04.pth +a287c0dfe49081626567c7fe87f74cce5831f58e459b427b5e05567641f47b78 models/65B/consolidated.05.pth +72b4eba67a1a3b18cb67a85b70f8f1640caae9b40033ea943fb166bd80a7b36b models/65B/consolidated.06.pth +d27f5b0677d7ff129ceacd73fd461c4d06910ad7787cf217b249948c3f3bc638 models/65B/consolidated.07.pth +60758f2384d74e423dffddfd020ffed9d3bb186ebc54506f9c4a787d0f5367b0 models/65B/ggml-model-f16.bin +cde053439fa4910ae454407e2717cc46cc2c2b4995c00c93297a2b52e790fa92 models/65B/ggml-model-q4_0.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q4_1.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q5_0.bin +ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff models/65B/ggml-model-q5_1.bin +999ed1659b469ccc2a941714c0a9656fa571d17c9f7c8c7589817ca90edef51b models/65B/params.json +9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 models/tokenizer.model diff --git a/ctransformers/models/submodules/ggllm.cpp/build.zig b/ctransformers/models/submodules/ggllm.cpp/build.zig new file mode 100644 index 0000000000000000000000000000000000000000..306127ffe2a73f604acfad8615f0c833aec13b69 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/build.zig @@ -0,0 +1,61 @@ +const std = @import("std"); + +pub fn build(b: *std.build.Builder) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardReleaseOptions(); + const want_lto = b.option(bool, "lto", "Want -fLTO"); + + const lib = b.addStaticLibrary("llama", null); + lib.want_lto = want_lto; + lib.setTarget(target); + lib.setBuildMode(optimize); + lib.linkLibCpp(); + lib.addIncludePath("."); + lib.addIncludePath("examples"); + lib.addCSourceFiles(&.{ + "ggml.c", + }, &.{"-std=c11"}); + lib.addCSourceFiles(&.{ + "llama.cpp", + }, &.{"-std=c++11"}); + lib.install(); + + const build_args = .{ .b = b, .lib = lib, .target = target, .optimize = optimize, .want_lto = want_lto }; + + const exe = build_example("main", build_args); + _ = build_example("quantize", build_args); + _ = build_example("perplexity", build_args); + _ = build_example("embedding", build_args); + + // create "zig build run" command for ./main + + const run_cmd = exe.run(); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); +} + +fn build_example(comptime name: []const u8, args: anytype) *std.build.LibExeObjStep { + const b = args.b; + const lib = args.lib; + const want_lto = args.want_lto; + + const exe = b.addExecutable(name, null); + exe.want_lto = want_lto; + lib.setTarget(args.target); + lib.setBuildMode(args.optimize); + exe.addIncludePath("."); + exe.addIncludePath("examples"); + exe.addCSourceFiles(&.{ + std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{name, name}), + "examples/common.cpp", + }, &.{"-std=c++11"}); + exe.linkLibrary(lib); + exe.install(); + + return exe; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/convert-lora-to-ggml.py b/ctransformers/models/submodules/ggllm.cpp/convert-lora-to-ggml.py new file mode 100644 index 0000000000000000000000000000000000000000..9090e8d6dd55a76c3816f9985e9c036a195a6210 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/convert-lora-to-ggml.py @@ -0,0 +1,129 @@ +import json +import os +import re +import struct +import sys +from typing import Any, Dict, Sequence, TextIO + +import torch + +from convert import DATA_TYPE_TO_FTYPE, NUMPY_TYPE_TO_DATA_TYPE, DataType + +HF_SUBLAYER_TO_GGML = { + "self_attn.q_proj": "attention.wq", + "self_attn.k_proj": "attention.wk", + "self_attn.v_proj": "attention.wv", + "self_attn.o_proj": "attention.wo", + "mlp.gate_proj": "feed_forward.w1", + "mlp.down_proj": "feed_forward.w2", + "mlp.up_proj": "feed_forward.w3", + "input_layernorm": "attention_norm", + "post_attention_layernorm": "ffn_norm", + # "norm": "norm", + # "embed_tokens": "tok_embeddings", + # "lm_head": "output", +} + + +def translate_tensor_name(t: str) -> str: + match = re.match(r".*layers\.(\d+)\.(\w+\.\w+)\.lora_(A|B)\.weight", t) + if match: + nn = match.group(1) + sub_layer = match.group(2) + lora_type = match.group(3) + + sub_layer_renamed = HF_SUBLAYER_TO_GGML.get(sub_layer) + if sub_layer_renamed is None: + print(f"Error: unrecognized sub-layer {sub_layer} in tensor {t}") + sys.exit(1) + + output_string = ( + f"layers.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}" + ) + return output_string + else: + print(f"Error: unrecognized tensor {t}") + sys.exit(1) + + +def write_file_header(fout: TextIO, params: Dict[str, Any]) -> None: + fout.write(b"ggla"[::-1]) # magic (ggml lora) + fout.write(struct.pack("i", 1)) # file version + fout.write(struct.pack("i", params["r"])) + # https://opendelta.readthedocs.io/en/latest/modules/deltas.html says that `lora_alpha` is an int + # but some models ship a float value instead + # let's convert to int, but fail if lossless conversion is not possible + assert int(params["lora_alpha"]) == params["lora_alpha"], "cannot convert float to int losslessly" + fout.write(struct.pack("i", int(params["lora_alpha"]))) + + +def write_tensor_header( + self, name: str, shape: Sequence[int], data_type: DataType +) -> None: + sname = name.encode("utf-8") + fout.write( + struct.pack( + "iii", + len(shape), + len(sname), + DATA_TYPE_TO_FTYPE[NUMPY_TYPE_TO_DATA_TYPE[data_type]], + ) + ) + fout.write(struct.pack("i" * len(shape), *shape[::-1])) + fout.write(sname) + fout.seek((fout.tell() + 31) & -32) + + +if len(sys.argv) != 2: + print(f"Usage: python {sys.argv[0]} ") + print( + "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'" + ) + sys.exit(1) + +input_json = os.path.join(sys.argv[1], "adapter_config.json") +input_model = os.path.join(sys.argv[1], "adapter_model.bin") +output_path = os.path.join(sys.argv[1], "ggml-adapter-model.bin") + +model = torch.load(input_model, map_location="cpu") + +with open(input_json, "r") as f: + params = json.load(f) + +if params["peft_type"] != "LORA": + print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") + sys.exit(1) + +if params["fan_in_fan_out"] is True: + print("Error: param fan_in_fan_out is not supported") + sys.exit(1) + +if params["bias"] is not None and params["bias"] != "none": + print("Error: param bias is not supported") + sys.exit(1) + +# TODO: these seem to be layers that have been trained but without lora. +# doesn't seem widely used but eventually should be supported +if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0: + print("Error: param modules_to_save is not supported") + sys.exit(1) + +with open(output_path, "wb") as fout: + fout.truncate() + + write_file_header(fout, params) + for k, v in model.items(): + if k.endswith("lora_A.weight"): + if v.dtype != torch.float16 and v.dtype != torch.float32: + v = v.float() + v = v.T + else: + v = v.float() + + t = v.numpy() + tname = translate_tensor_name(k) + print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") + write_tensor_header(fout, tname, t.shape, t.dtype) + t.tofile(fout) + +print(f"Converted {input_json} and {input_model} to {output_path}") diff --git a/ctransformers/models/submodules/ggllm.cpp/convert-pth-to-ggml.py b/ctransformers/models/submodules/ggllm.cpp/convert-pth-to-ggml.py new file mode 100644 index 0000000000000000000000000000000000000000..dd15393c3fe45d47c2a3737103d4ed02e0a7a50c --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/convert-pth-to-ggml.py @@ -0,0 +1,13 @@ +# Compatibility stub + +import argparse + +import convert + +parser = argparse.ArgumentParser( + description="""[DEPRECATED - use `convert.py` instead] + Convert a LLaMA model checkpoint to a ggml compatible file""") +parser.add_argument('dir_model', help='directory containing the model checkpoint') +parser.add_argument('ftype', help='file type (0: float32, 1: float16)', type=int, choices=[0, 1], default=1) +args = parser.parse_args() +convert.main(['--outtype', 'f16' if args.ftype == 1 else 'f32', '--', args.dir_model]) diff --git a/ctransformers/models/submodules/ggllm.cpp/convert.py b/ctransformers/models/submodules/ggllm.cpp/convert.py new file mode 100644 index 0000000000000000000000000000000000000000..265c41fa04b189669059773abc76f9bd347d2116 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/convert.py @@ -0,0 +1,1178 @@ +import argparse +import concurrent.futures +import copy +import enum +import faulthandler +import functools +import io +import itertools +import json +import math +import mmap +import pickle +import re +import signal +import struct +import sys +import zipfile +from abc import ABCMeta, abstractmethod +from dataclasses import dataclass +from pathlib import Path +from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List, + Literal, Optional, Sequence, Tuple, TypeVar, Union) + +import numpy as np +from sentencepiece import SentencePieceProcessor # type: ignore + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + +if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'): + faulthandler.register(signal.SIGUSR1) + +NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]' + + +@dataclass(frozen=True) +class UnquantizedDataType: + name: str + + +DT_F16 = UnquantizedDataType('F16') +DT_F32 = UnquantizedDataType('F32') +DT_I32 = UnquantizedDataType('I32') +DT_BF16 = UnquantizedDataType('BF16') + + +@dataclass(frozen=True) +class QuantizedDataType: + groupsize: int + have_addends: bool + have_g_idx: bool + + +DT_Q4_0 = QuantizedDataType(groupsize=32, have_addends=False, have_g_idx=False) +DT_Q4_1 = QuantizedDataType(groupsize=32, have_addends=True, have_g_idx=False) + +DataType = Union[UnquantizedDataType, QuantizedDataType] + +DATA_TYPE_TO_FTYPE: Dict[DataType, int] = { + DT_F32: 0, + DT_F16: 1, + DT_Q4_0: 2, + DT_Q4_1: 3, +} + +FTYPE_TO_DATA_TYPE: Dict[int, DataType] = \ + {ftype: dtype for (dtype, ftype) in DATA_TYPE_TO_FTYPE.items()} + +DATA_TYPE_TO_NUMPY: Dict[DataType, 'np.dtype[Any]'] = { + DT_BF16: np.dtype(np.uint16), + DT_F16: np.dtype(np.float16), + DT_F32: np.dtype(np.float32), + DT_I32: np.dtype(np.int32), +} + +NUMPY_TYPE_TO_DATA_TYPE: Dict['np.dtype[Any]', DataType] = \ + {dtype: data_type for (data_type, dtype) in DATA_TYPE_TO_NUMPY.items()} + + +class GGMLFileType(enum.Enum): + AllF32 = 0 + MostlyF16 = 1 # except 1d tensors + MostlyQ4_0 = 2 # except 1d tensors + MostlyQ4_1 = 3 # except 1d tensors + PerLayerIsQ4_1 = 4 # but tok_embeddings.weight and output.weight are F16 + + def type_for_tensor(self, name: str, tensor: 'LazyTensor') -> DataType: + if len(tensor.shape) == 1: + # 1D tensors are always F32. + return DT_F32 + elif self == GGMLFileType.AllF32: + return DT_F32 + elif self == GGMLFileType.MostlyF16: + return DT_F16 + elif self == GGMLFileType.MostlyQ4_0: + return DT_Q4_0 + elif self == GGMLFileType.MostlyQ4_1: + return DT_Q4_1 + elif self == GGMLFileType.PerLayerIsQ4_1: + if name in ('output.weight', 'tok_embeddings.weight'): + return DT_F16 + else: + return DT_Q4_1 + else: + raise ValueError(self) + + +def make_tensors_list() -> List[str]: + ret = [ + 'tok_embeddings.weight', + 'norm.weight', + 'output.weight', + ] + for i in range(80): # maximum number of layer + ret += [ + f'layers.{i}.attention.wq.weight', + f'layers.{i}.attention.wk.weight', + f'layers.{i}.attention.wv.weight', + f'layers.{i}.attention.wo.weight', + f'layers.{i}.attention_norm.weight', + f'layers.{i}.feed_forward.w1.weight', + f'layers.{i}.feed_forward.w2.weight', + f'layers.{i}.feed_forward.w3.weight', + f'layers.{i}.ffn_norm.weight', + ] + return ret + + +TENSORS_LIST = make_tensors_list() +TENSORS_SET = set(TENSORS_LIST) + + +@dataclass +class Params: + n_vocab: int + n_embd: int + n_mult: int + n_head: int + n_layer: int + file_type: GGMLFileType + + @staticmethod + def guessed(model: 'LazyModel', file_type: GGMLFileType) -> 'Params': + n_vocab, n_embd = model["tok_embeddings.weight"].shape + + return Params( + n_vocab=n_vocab, + n_embd=n_embd, + n_mult=256, + n_head=n_embd // 128, + n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model), + file_type=file_type, + ) + + +class SentencePieceVocab: + def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path]) -> None: + self.sentencepiece_tokenizer = SentencePieceProcessor(str(fname_tokenizer)) + added_tokens: Dict[str, int] + if fname_added_tokens is not None: + added_tokens = json.load(open(fname_added_tokens)) + else: + added_tokens = {} + vocab_size: int = self.sentencepiece_tokenizer.vocab_size() + expected_ids = list(range(vocab_size, vocab_size + len(added_tokens))) + actual_ids = sorted(added_tokens.values()) + if expected_ids != actual_ids: + raise Exception(f"Expected added token IDs to be sequential and start at {len(added_tokens)}; got {actual_ids}") + items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1]) + self.added_tokens_list = [text for (text, idx) in items] + self.vocab_size_base: int = vocab_size + self.vocab_size: int = self.vocab_size_base + len(self.added_tokens_list) + self.fname_tokenizer = fname_tokenizer + self.fname_added_tokens = fname_added_tokens + + def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]: + tokenizer = self.sentencepiece_tokenizer + for i in range(tokenizer.vocab_size()): + text: bytes + if tokenizer.is_unknown(i): + text = " \u2047 ".encode("utf-8") + elif tokenizer.is_control(i): + text = b"" + elif tokenizer.is_byte(i): + piece = tokenizer.id_to_piece(i) + if len(piece) != 6: + raise Exception(f"Invalid token: {piece}") + byte_value = int(piece[3:-1], 16) + text = struct.pack("B", byte_value) + else: + text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") + score: float = tokenizer.get_score(i) + yield text, score + + def added_tokens(self) -> Iterable[Tuple[bytes, float]]: + for text in self.added_tokens_list: + score = -1000.0 + yield text.encode("utf-8"), score + + def all_tokens(self) -> Iterable[Tuple[bytes, float]]: + yield from self.sentencepiece_tokens() + yield from self.added_tokens() + + def __repr__(self) -> str: + return f"" + + +class GGMLVocab: + def __init__(self, tokens: List[Tuple[bytes, float]]): + self.tokens = tokens + self.vocab_size = len(tokens) + + def all_tokens(self) -> Iterable[Tuple[bytes, float]]: + return self.tokens + + def __repr__(self) -> str: + return f"" + + +Vocab = Union[SentencePieceVocab, GGMLVocab] + + +def permute(weights: NDArray, n_head: int) -> NDArray: + return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) + .swapaxes(1, 2) + .reshape(weights.shape)) + + +def dequantize_q4(qvalues_pack32: NDArray, scales: NDArray, addends: Optional[NDArray], g_idx: Optional[NDArray]) -> NDArray: + # First reinterpret each row from a list of int32s containing 8 values each + # to a list of uint8s containing 2 values each. + qvalues_pack8 = qvalues_pack32.view(np.uint8) + + # Then split out the two values per int8 (which requires an actual + # conversion because numpy doesn't natively support int4s). + qvalues = np.zeros([qvalues_pack8.shape[0], qvalues_pack8.shape[1] * 2], dtype=np.uint8) + qvalues[:, 0::2] = qvalues_pack8 & 0xf + qvalues[:, 1::2] = qvalues_pack8 >> 4 + + assert addends is None or addends.shape == scales.shape + assert qvalues.shape[0] == scales.shape[0] + assert qvalues.shape[1] % scales.shape[1] == 0 + if g_idx is None: + repeat_count = qvalues.shape[1] // scales.shape[1] + scales = scales[:, :, np.newaxis] + if addends is not None: + addends = addends[:, :, np.newaxis] + # Reshape so that the below computation broadcasts over scales and addends: + qvalues.shape = (qvalues.shape[0], scales.shape[1], int(repeat_count)) + else: + # In this case the scale and addend is selected for each column by g_idx: + assert addends is not None + scales = scales[:, g_idx] + addends = addends[:, g_idx] + if addends is None: + # Q4_0 + qvalues = qvalues.view(np.int8) + qvalues -= 8 + # And do the actual 'value = scale * qvalue + addend' computation. + values = scales * qvalues + if addends is not None: + values += addends + if g_idx is None: + values.shape = (values.shape[0], values.shape[1] * values.shape[2]) + return values + + +class Tensor(metaclass=ABCMeta): + data_type: DataType + + @abstractmethod + def astype(self, data_type: DataType) -> 'Tensor': ... + @abstractmethod + def permute(self, n_head: int) -> 'Tensor': ... + @abstractmethod + def to_ggml(self) -> 'GGMLCompatibleTensor': ... + + +def bf16_to_fp32(bf16_arr: np.ndarray) -> np.ndarray: + assert bf16_arr.dtype == np.uint16, f"Input array should be of dtype uint16, but got {bf16_arr.dtype}" + fp32_arr = bf16_arr.astype(np.uint32) << 16 + return fp32_arr.view(np.float32) + + +class UnquantizedTensor(Tensor): + def __init__(self, ndarray: NDArray) -> None: + assert isinstance(ndarray, np.ndarray) + self.ndarray = ndarray + self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype] + + def astype(self, data_type: DataType) -> Tensor: + dtype = DATA_TYPE_TO_NUMPY[data_type] + if self.data_type == DT_BF16: + self.ndarray = bf16_to_fp32(self.ndarray) + return UnquantizedTensor(self.ndarray.astype(dtype)) + + def to_ggml(self) -> 'UnquantizedTensor': + return self + + def permute(self, n_head: int) -> 'UnquantizedTensor': + return UnquantizedTensor(permute(self.ndarray, n_head)) + + +def load_unquantized(lazy_tensor: 'LazyTensor', expected_dtype: Any = None, convert: bool = False) -> NDArray: + tensor = lazy_tensor.load() + assert isinstance(tensor, UnquantizedTensor) + + # double-check: + actual_shape = list(tensor.ndarray.shape) + assert actual_shape == lazy_tensor.shape, (actual_shape, lazy_tensor.shape) + if expected_dtype is not None and expected_dtype != tensor.ndarray.dtype: + if convert: + tensor.ndarray = tensor.ndarray.astype(expected_dtype) + else: + raise ValueError(f'expected this tensor to have dtype {expected_dtype}, got {tensor.ndarray.dtype}') + + return tensor.ndarray + + +class GGMLQuantizedTensor(Tensor): + data_type: QuantizedDataType + + def __init__(self, ndarray: NDArray, shape: List[int], data_type: DataType) -> None: + rows, columns = shape + assert data_type in (DT_Q4_1, DT_Q4_0) # for now + assert isinstance(data_type, QuantizedDataType) # redundant, but mypy complains without this + assert columns % data_type.groupsize == 0 + words_in_block = 6 if data_type == DT_Q4_1 else 5 + self.ndarray = ndarray.view(dtype=np.uint32).reshape((rows, columns // data_type.groupsize, words_in_block)) + self.shape = shape[:] + self.data_type = data_type + + def astype(self, data_type: DataType) -> Tensor: + if data_type == self.data_type: + return self + scales = self.ndarray[:, :, 0].view(np.float32) + if self.data_type.have_addends: + addends = self.ndarray[:, :, 1].view(np.float32) + else: + addends = None + qweights = self.ndarray[:, :, -4:].reshape([self.shape[0], self.shape[1] // 8]) + + dq = dequantize_q4(qweights, scales, addends, g_idx=None) + return UnquantizedTensor(dq).astype(data_type) + + def to_ggml(self) -> 'GGMLQuantizedTensor': + return self + + def permute(self, n_head: int) -> 'GGMLQuantizedTensor': + return GGMLQuantizedTensor(permute(self.ndarray, n_head), self.shape, self.data_type) + + +GGMLCompatibleTensor = Union[UnquantizedTensor, GGMLQuantizedTensor] + + +class DeferredPermutedTensor(Tensor): + def __init__(self, base: Tensor, n_head: int) -> None: + self.base = base + self.n_head = n_head + self.data_type = self.base.data_type + + def astype(self, data_type: DataType) -> Tensor: + return self.base.astype(data_type).permute(self.n_head) + + def to_ggml(self) -> GGMLCompatibleTensor: + return self.base.to_ggml().permute(self.n_head) + + def permute(self, n_head: int) -> Tensor: + raise Exception("shouldn't permute twice") + + +class GPTQForLLaMaQuantizedTensor(Tensor): + def __init__(self, model: 'LazyModel', namebase: str) -> None: + qweight = load_unquantized(model[f"{namebase}.qweight"], np.int32) + scales = load_unquantized(model[f"{namebase}.scales"], np.float32, convert=True) + + bias = model.get(f"{namebase}.bias") + if bias is not None: + # Q4_1 does not support bias; good thing the bias is always all zeros. + assert not np.any(load_unquantized(bias)) + + if f"{namebase}.zeros" in model: + zeros = load_unquantized(model[f"{namebase}.zeros"], np.float32) + else: + qzeros = load_unquantized(model[f"{namebase}.qzeros"], np.int32) + assert qzeros.dtype == np.int32 + zeros = dequantize_q4(qzeros, scales, scales, g_idx=None) + assert zeros.dtype == np.float32 + + assert zeros.shape == scales.shape + + # Output is transposed compared to the input, and addends have their sign flipped. + # Scales and zeros similarly must be transposed but only for newer + # versions of GPTQ-for-LLaMa; the older versions can be identified by + # having shape (n_embd, 1). + qweight = qweight.T + if scales.shape[1] != 1: + scales = scales.T + zeros = zeros.T + + # Output also has signs flipped for the addends. + self.qweight = qweight + self.scales = scales + self.addends = -zeros + + self.g_idx: Optional[NDArray] + if f"{namebase}.g_idx" in model: + self.g_idx = load_unquantized(model[f"{namebase}.g_idx"], np.int32) + assert self.g_idx.shape == (qweight.shape[1] * 8,) + else: + self.g_idx = None + + self.shape = [self.qweight.shape[0], self.qweight.shape[1] * 8] + self.data_type = QuantizedDataType(groupsize=self.groupsize(), have_addends=True, + have_g_idx=(self.g_idx is not None)) + + def inspect(self, row: int, col: int) -> None: + '''For debugging.''' + qweight = (self.qweight[row, col // 8] >> (4 * (col & 7))) & 0xf + if self.g_idx is not None: + group = self.g_idx[col] + else: + group = int(col // self.groupsize()) + scale = self.scales[row, group] + addend = self.addends[row, group] + with np.printoptions(precision=None, suppress=True): + print(f'scale:{scale} addend:{addend} qweight:{qweight}') + print('possible values:', np.arange(16) * scale + addend) + print('actual value:', qweight * scale + addend) + + def astype(self, data_type: DataType) -> Tensor: + if isinstance(data_type, QuantizedDataType): + assert self.g_idx is None and data_type.have_addends is True and data_type.have_g_idx is False + return self.regroup(data_type.groupsize) + + dequantized = dequantize_q4(np.ascontiguousarray(self.qweight), self.scales, self.addends, self.g_idx) + return UnquantizedTensor(dequantized).astype(data_type) + + def groupsize(self) -> int: + assert self.addends.shape == self.scales.shape + assert self.shape[1] % self.scales.shape[1] == 0 + return self.shape[1] // self.scales.shape[1] + + def regroup(self, new_groupsize: int = 32) -> 'GPTQForLLaMaQuantizedTensor': + # Old versions of GPTQ-for-LLaMa shared scales and addends between all the + # columns in a row. Newer versions share them between every set of N + # columns in a row, where N is the `groupsize` parameter, usually 128. The + # output format shares them between every set of 32 columns. To handle + # this, duplicate scales and addends for every smaller group. + # (In the above, 'row' and 'column' are in the sense of the output.) + assert self.g_idx is None + old_groupsize = self.groupsize() + assert old_groupsize >= new_groupsize and old_groupsize % new_groupsize == 0, old_groupsize + ret = copy.copy(self) + ret.addends = self.addends.repeat(old_groupsize // new_groupsize, axis=1) + ret.scales = self.scales.repeat(old_groupsize // new_groupsize, axis=1) + ret.data_type = QuantizedDataType(groupsize=new_groupsize, have_addends=True, have_g_idx=False) + return ret + + def permute(self, n_head: int) -> Tensor: + return DeferredPermutedTensor(self, n_head) + + def to_ggml(self) -> GGMLQuantizedTensor: + # The output format looks like this: + # For each row: + # For each group of 32 columns: + # - addend (float32, 4 bytes) + # - scale (float32, 4 bytes) + # - weights (int4 * 32, 16 bytes) + + if self.groupsize() != 32: + raise Exception("should have been regrouped before converting to ggml") + + # Since the output format is mixed between integers and floats, we have + # to hackily view the floats as int32s just so numpy will let us + # concatenate them. + addends_view = self.addends.view(dtype=np.int32)[:, :, np.newaxis] + scales_view = self.scales.view(dtype=np.int32)[:, :, np.newaxis] + + # Split into groups of 4 columns (i.e. 32 columns of quantized data): + grouped = self.qweight.reshape([self.qweight.shape[0], self.qweight.shape[1] // 4, 4]) + + # And concatenate: + grouped = np.concatenate([scales_view, addends_view, grouped], axis=2, casting='no') + + return GGMLQuantizedTensor(grouped, self.shape, DT_Q4_1) + + +@dataclass +class LazyTensor: + _load: Callable[[], Tensor] + shape: List[int] + data_type: DataType + description: str + + def load(self) -> Tensor: + ret = self._load() + assert ret.data_type == self.data_type, (self.data_type, ret.data_type, self.description) + return ret + + def astype(self, data_type: DataType) -> 'LazyTensor': + self.validate_conversion_to(data_type) + + def load() -> Tensor: + return self.load().astype(data_type) + return LazyTensor(load, self.shape, data_type, f'convert({data_type}) {self.description}') + + def validate_conversion_to(self, data_type: DataType) -> None: + if data_type == self.data_type: + return + if isinstance(data_type, QuantizedDataType): + if not isinstance(self.data_type, QuantizedDataType): + raise Exception(f"Can't turn an unquantized tensor into a quantized type ({data_type})") + if self.data_type.have_g_idx: + sys.stderr.write( + "Error: Input uses the newer GPTQ-for-LLaMa format (using g_idx), " + "which is not yet natively supported by GGML. " + "For now you can still convert this model by passing `--outtype f16` to dequantize, " + "but that will result in a much larger output file for no quality benefit.\n") + sys.exit(1) + assert not data_type.have_g_idx and self.data_type.have_addends and data_type.have_addends + + +LazyModel = Dict[str, LazyTensor] + + +@dataclass +class ModelPlus: + model: LazyModel + paths: List[Path] # Where this was read from. + format: Literal['ggml', 'torch', 'safetensors'] + vocab: Optional[Vocab] # For GGML models (which have vocab built in), the vocab. + + +def merge_sharded(models: List[LazyModel]) -> LazyModel: + # Original LLaMA models have each file contain one part of each tensor. + # Use a dict instead of a set to preserve order. + names = {name: None for model in models for name in model} + + def convert(name: str) -> LazyTensor: + lazy_tensors: List[LazyTensor] = [model[name] for model in models] + if len(lazy_tensors) == 1: + # only one file; don't go through this procedure since there might + # be quantized tensors + return lazy_tensors[0] + if len(lazy_tensors[0].shape) == 1: + # the tensor is just duplicated in every file + return lazy_tensors[0] + if name.startswith('tok_embeddings.') or \ + name.endswith('.attention.wo.weight') or \ + name.endswith('.feed_forward.w2.weight'): + # split by columns + axis = 1 + else: + # split by rows + axis = 0 + concatenated_shape = list(lazy_tensors[0].shape) + concatenated_shape[axis] = sum(tensor.shape[axis] for tensor in lazy_tensors) + + def load() -> UnquantizedTensor: + ndarrays = [load_unquantized(tensor) for tensor in lazy_tensors] + concatenated: NDArray = np.concatenate(ndarrays, axis=axis) + return UnquantizedTensor(concatenated) + description = 'concatenated[[' + '] | ['.join(lt.description for lt in lazy_tensors) + ']]' + return LazyTensor(load, concatenated_shape, lazy_tensors[0].data_type, description) + return {name: convert(name) for name in names} + + +def merge_multifile_models(models_plus: List[ModelPlus]) -> ModelPlus: + formats = set(mp.format for mp in models_plus) + assert len(formats) == 1, "different formats?" + format = formats.pop() + paths = [path for mp in models_plus for path in mp.paths] + # Use the first non-None vocab, if any. + try: + vocab = next(mp.vocab for mp in models_plus if mp.vocab is not None) + except StopIteration: + vocab = None + + if any("model.embed_tokens.weight" in mp.model for mp in models_plus): + # Transformers models put different tensors in different files, but + # don't split indivdual tensors between files. + model: LazyModel = {} + for mp in models_plus: + model.update(mp.model) + else: + model = merge_sharded([mp.model for mp in models_plus]) + + return ModelPlus(model, paths, format, vocab) + + +def permute_lazy(lazy_tensor: LazyTensor, n_head: int) -> LazyTensor: + def load() -> Tensor: + return lazy_tensor.load().permute(n_head) + return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}) ' + lazy_tensor.description) + + +def convert_transformers_to_orig(model: LazyModel) -> LazyModel: + out: LazyModel = {} + out["tok_embeddings.weight"] = model["model.embed_tokens.weight"] + out["norm.weight"] = model["model.norm.weight"] + out["output.weight"] = model["lm_head.weight"] + + n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128 + for i in itertools.count(): + if f"model.layers.{i}.self_attn.q_proj.weight" not in model: + break + out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], n_head) + out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], n_head) + out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"] + out[f"layers.{i}.attention.wo.weight"] = model[f"model.layers.{i}.self_attn.o_proj.weight"] + + out[f"layers.{i}.feed_forward.w1.weight"] = model[f"model.layers.{i}.mlp.gate_proj.weight"] + out[f"layers.{i}.feed_forward.w2.weight"] = model[f"model.layers.{i}.mlp.down_proj.weight"] + out[f"layers.{i}.feed_forward.w3.weight"] = model[f"model.layers.{i}.mlp.up_proj.weight"] + + out[f"layers.{i}.attention_norm.weight"] = model[f"model.layers.{i}.input_layernorm.weight"] + out[f"layers.{i}.ffn_norm.weight"] = model[f"model.layers.{i}.post_attention_layernorm.weight"] + return out + + +def handle_quantization(model: LazyModel) -> LazyModel: + '''Convert a model with entries for 'foo.qweight', 'foo.scales', etc. + (which resolve to UnquantizedTensors with the raw data) to one with entries + for 'foo.weight' (which resolve to QuantizedTensors). + ''' + def convert(name: str) -> Tuple[str, LazyTensor]: + if name.endswith(".qweight"): + namebase = name.rsplit('.', 1)[0] + orig_name = namebase + ".weight" + + lazy_tensor = model[name] + assert len(lazy_tensor.shape) == 2 + real_shape = [lazy_tensor.shape[1], lazy_tensor.shape[0] * 8] + + # Calculate type. This replicates the logic in + # GPTQForLLaMaQuantizedTensor (which is executed when the modelis + # actually loaded). + lazy_scales = model[f"{namebase}.scales"] + scales_width = 1 if lazy_scales.shape[1] == 1 else lazy_scales.shape[0] + assert real_shape[1] % scales_width == 0 + groupsize = real_shape[1] // scales_width + have_g_idx = f"{namebase}.g_idx" in model + data_type = QuantizedDataType(groupsize=groupsize, have_addends=True, have_g_idx=have_g_idx) + + def load() -> Tensor: + return GPTQForLLaMaQuantizedTensor(model, namebase) + + return (orig_name, LazyTensor(load, real_shape, data_type, '[quantized]')) + else: + return (name, model[name]) + return dict(convert(name) for name in model) + +# Functionality that simulates `torch.load` but where individual tensors are +# only loaded into memory on demand, not all at once. +# PyTorch can't do this natively as of time of writing: +# - https://github.com/pytorch/pytorch/issues/64327 +# This allows us to de-shard without multiplying RAM usage, and also +# conveniently drops the PyTorch dependency (though we still need numpy). + + +@dataclass +class LazyStorageKind: + data_type: DataType + + +@dataclass +class LazyStorage: + load: Callable[[int, int], NDArray] + kind: LazyStorageKind + description: str + + +class LazyUnpickler(pickle.Unpickler): + def __init__(self, fp: IO[bytes], data_base_path: str, zip_file: zipfile.ZipFile): + super().__init__(fp) + self.data_base_path = data_base_path + self.zip_file = zip_file + + def persistent_load(self, pid: Any) -> Any: + assert pid[0] == 'storage' + assert isinstance(pid[1], LazyStorageKind) + data_type = pid[1].data_type + filename_stem = pid[2] + filename = self.data_base_path + '/' + filename_stem + info = self.zip_file.getinfo(filename) + + def load(offset: int, elm_count: int) -> NDArray: + dtype = DATA_TYPE_TO_NUMPY.get(data_type) + if dtype is None: + raise Exception("tensor stored in unsupported format") + fp = self.zip_file.open(info) + fp.seek(offset * dtype.itemsize) + size = elm_count * dtype.itemsize + data = fp.read(size) + assert len(data) == size + return np.frombuffer(data, dtype) + description = f'storage data_type={data_type} path-in-zip={filename} path={self.zip_file.filename}' + return LazyStorage(load=load, kind=pid[1], description=description) + + # @staticmethod + def lazy_rebuild_tensor_v2(storage: Any, storage_offset: Any, size: Any, stride: Any, + # pyright: ignore[reportSelfClsParameterName] + requires_grad: Any, backward_hooks: Any, metadata: Any = None) -> LazyTensor: + assert isinstance(storage, LazyStorage) + + def load() -> UnquantizedTensor: + elm_count = stride[0] * size[0] + return UnquantizedTensor(storage.load(storage_offset, elm_count).reshape(size)) + description = f'pickled storage_offset={storage_offset} in {storage.description}' + return LazyTensor(load, list(size), storage.kind.data_type, description) + + # @staticmethod + def rebuild_from_type_v2(func, new_type, args, state): + return func(*args) + + CLASSES: Dict[Any, Any] = { + ('torch._tensor', '_rebuild_from_type_v2'): rebuild_from_type_v2, + ('torch._utils', '_rebuild_tensor_v2'): lazy_rebuild_tensor_v2, + ('torch', 'BFloat16Storage'): LazyStorageKind(DT_BF16), + ('torch', 'HalfStorage'): LazyStorageKind(DT_F16), + ('torch', 'FloatStorage'): LazyStorageKind(DT_F32), + ('torch', 'IntStorage'): LazyStorageKind(DT_I32), + ('torch', 'Tensor'): LazyTensor, + } + + def find_class(self, module: str, name: str) -> Any: + if not module.startswith('torch'): + return super().find_class(module, name) + return self.CLASSES[(module, name)] + + +def lazy_load_torch_file(outer_fp: IO[bytes], path: Path) -> ModelPlus: + zf = zipfile.ZipFile(outer_fp) + pickle_paths = [name for name in zf.namelist() if name.endswith('.pkl')] + assert len(pickle_paths) == 1, pickle_paths + pickle_fp = zf.open(pickle_paths[0], 'r') + unpickler = LazyUnpickler(pickle_fp, + data_base_path=pickle_paths[0][:-4], + zip_file=zf) + model = unpickler.load() + as_dict = dict(model.items()) + return ModelPlus(model=as_dict, paths=[path], format='torch', vocab=None) + + +SAFETENSORS_DATA_TYPES: Dict[str, DataType] = { + 'F16': DT_F16, + 'F32': DT_F32, + 'I32': DT_I32, +} + + +def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus: + header_size, = struct.unpack(' LazyTensor: + data_type = SAFETENSORS_DATA_TYPES[info['dtype']] + numpy_dtype = DATA_TYPE_TO_NUMPY[data_type] + shape: List[int] = info['shape'] + begin, end = info['data_offsets'] + assert 0 <= begin <= end <= len(byte_buf) + assert end - begin == math.prod(shape) * numpy_dtype.itemsize + buf = byte_buf[begin:end] + + def load() -> UnquantizedTensor: + return UnquantizedTensor(np.frombuffer(buf, dtype=numpy_dtype).reshape(shape)) + description = f'safetensors begin={begin} end={end} type={data_type} path={path}' + return LazyTensor(load, shape, data_type, description) + model = {name: convert(info) for (name, info) in header.items() if name != '__metadata__'} + return ModelPlus(model=model, paths=[path], format='safetensors', vocab=None) + + +def must_read(fp: IO[bytes], length: int) -> bytes: + ret = fp.read(length) + if len(ret) < length: + raise Exception("unexpectedly reached end of file") + return ret + + +def lazy_load_ggml_file(fp: io.BufferedReader, path: Path) -> ModelPlus: + magic = must_read(fp, 4)[::-1] + if magic in (b'ggmf', b'ggjt'): + version, = struct.unpack("i", must_read(fp, 4)) + assert version == 1 + else: + assert magic == b'ggml' + version = None + n_vocab, n_embd, n_mult, n_head, n_layer, rot, file_type = struct.unpack('<7i', must_read(fp, 28)) + + tokens: List[Tuple[bytes, float]] = [] + for i in range(n_vocab): + if i == 32000: + # HACK: GPT4All messed with the format without changing the magic + # number. Specifically, they changed the vocab section to contain + # `n_vocab - 1` tokens instead of `n_vocab` (i.e. omitting the + # extra pad token). Try to detect if we're reading a file like + # this. + orig_pos = fp.tell() + fp.seek(20, io.SEEK_CUR) + is_gpt4all = fp.read(21) == b'tok_embeddings.weight' + fp.seek(orig_pos) + if is_gpt4all: + break + + length, = struct.unpack("i", must_read(fp, 4)) + text = must_read(fp, length) + if magic != b'ggml': + score, = struct.unpack("f", must_read(fp, 4)) + tokens.append((text, score)) + vocab = GGMLVocab(tokens) if magic != b'ggml' else None + + model: LazyModel = {} + # Use mmap for the actual data to avoid race conditions with the file offset. + off = fp.raw.tell() + mapped = memoryview(mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)) + fp.raw.seek(off) # needed on Windows + + def read_tensor() -> None: # this is a function so that variables captured in `load` don't change + shape_len, name_len, ftype = struct.unpack("iii", must_read(fp, 12)) + assert 0 <= shape_len <= 3 + shape: List[int] = list(struct.unpack(f"{shape_len}i", must_read(fp, 4 * shape_len))) + shape = shape[::-1] + name = must_read(fp, name_len).decode('utf-8') + data_type = FTYPE_TO_DATA_TYPE[ftype] + + if magic == b'ggjt': + fp.seek((fp.tell() + 31) & -32) + + if data_type == DT_Q4_1: + # See GPTQForLLaMaQuantizedTensor.ggml_ndarray() + size = 24 * (shape[1] // 32) * shape[0] + elif data_type == DT_Q4_0: + size = 20 * (shape[1] // 32) * shape[0] + else: + numpy_dtype = DATA_TYPE_TO_NUMPY[data_type] + elm_count = math.prod(shape) + size = elm_count * numpy_dtype.itemsize + offset = fp.tell() + buf = mapped[offset:offset+size] + fp.seek(size, io.SEEK_CUR) + + def load() -> Tensor: + if isinstance(data_type, QuantizedDataType): + ndarray = np.frombuffer(buf, dtype=np.uint32) + return GGMLQuantizedTensor(ndarray, shape, data_type) + else: + return UnquantizedTensor(np.frombuffer(buf, dtype=numpy_dtype).reshape(shape)) + description = f'ggml offset={offset} type={data_type} path={path}' + model[name] = LazyTensor(load, shape, data_type, description) + + while fp.read(1) != b'': + fp.seek(-1, io.SEEK_CUR) + read_tensor() + + return ModelPlus(model=model, paths=[path], format='ggml', vocab=vocab) + + +@functools.lru_cache(maxsize=None) +def lazy_load_file(path: Path) -> ModelPlus: + fp = open(path, 'rb') + first8 = fp.read(8) + fp.seek(0) + if first8[:2] == b'PK': + # A zip file, i.e. PyTorch format + return lazy_load_torch_file(fp, path) + elif first8[2:4] == b'gg': + # GGML format + return lazy_load_ggml_file(fp, path) + elif struct.unpack(' Iterable[Out]: + '''Parallel map, but with backpressure. If the caller doesn't call `next` + fast enough, this will stop calling `func` at some point rather than + letting results pile up in memory. Specifically, there is a max of one + output value buffered per thread.''' + with concurrent.futures.ThreadPoolExecutor() as executor: + futures: List[concurrent.futures.Future[Out]] = [] + items_rev = list(iterable)[::-1] + for i in range(min(concurrency, len(items_rev))): + futures.append(executor.submit(func, items_rev.pop())) + while futures: + result = futures.pop(0).result() + if items_rev: + futures.append(executor.submit(func, items_rev.pop())) + yield result + + +def check_vocab_size(params: Params, vocab: Vocab) -> None: + if params.n_vocab != vocab.vocab_size: + # GGMLVocab comes from the same file as the model so shouldn't mismatch: + assert isinstance(vocab, SentencePieceVocab) + if params.n_vocab == vocab.vocab_size_base: + print("Ignoring added_tokens.json since model matches vocab size without it.") + vocab.added_tokens_list = [] + vocab.vocab_size = vocab.vocab_size_base + return + msg = f"Vocab size mismatch (model has {params.n_vocab}, but {vocab.fname_tokenizer}" + if vocab.fname_added_tokens is not None: + msg += f" combined with {vocab.fname_added_tokens}" + msg += f" has {vocab.vocab_size})." + if vocab.vocab_size < params.n_vocab < vocab.vocab_size + 20 and vocab.fname_added_tokens is None: + msg += f" Most likely you are missing added_tokens.json (should be in {vocab.fname_tokenizer.parent})." + raise Exception(msg) + + +class OutputFile: + def __init__(self, fname_out: Path) -> None: + self.fout = open(fname_out, "wb") + + def write_file_header(self, params: Params) -> None: + self.fout.write(b"ggjt"[::-1]) # magic + values = [ + 1, # file version + params.n_vocab, + params.n_embd, + params.n_mult, + params.n_head, + params.n_layer, + params.n_embd // params.n_head, # rot (obsolete) + params.file_type.value, + ] + self.fout.write(struct.pack("i" * len(values), *values)) + + def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None: + sname = name.encode('utf-8') + self.fout.write(struct.pack("iii", len(shape), len(sname), DATA_TYPE_TO_FTYPE[data_type])) + self.fout.write(struct.pack("i" * len(shape), *shape[::-1])) + self.fout.write(sname) + self.fout.seek((self.fout.tell() + 31) & -32) + + def write_vocab(self, vocab: Vocab) -> None: + for text, score in vocab.all_tokens(): + self.fout.write(struct.pack("i", len(text))) + self.fout.write(text) + self.fout.write(struct.pack("f", score)) + + @staticmethod + def write_vocab_only(fname_out: Path, vocab: Vocab) -> None: + of = OutputFile(fname_out) + params = Params(n_vocab=vocab.vocab_size, n_embd=0, n_mult=0, + n_head=1, n_layer=0, file_type=GGMLFileType.AllF32) + of = OutputFile(fname_out) + of.write_file_header(params) + of.write_vocab(vocab) + of.fout.close() + + @staticmethod + def write_all(fname_out: Path, params: Params, model: LazyModel, vocab: Vocab) -> None: + check_vocab_size(params, vocab) + of = OutputFile(fname_out) + of.write_file_header(params) + print("Writing vocab...") + of.write_vocab(vocab) + + def do_item(item: Tuple[str, LazyTensor]) -> NDArray: + name, lazy_tensor = item + return lazy_tensor.load().to_ggml().ndarray + + ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8) + for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)): + size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) + padi = len(str(len(model))) + print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}") + of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type) + ndarray.tofile(of.fout) + of.fout.close() + + +def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFileType: + wq_type = model["layers.0.attention.wq.weight"].data_type + if output_type_str == "f32" or (output_type_str is None and wq_type in (DT_F32, DT_BF16)): + return GGMLFileType.AllF32 + if output_type_str == "f16" or (output_type_str is None and wq_type == DT_F16): + return GGMLFileType.MostlyF16 + if output_type_str == "q4_1" or (output_type_str is None and isinstance(wq_type, QuantizedDataType) and + wq_type.have_addends): + if isinstance(model["output.weight"].data_type, QuantizedDataType): + return GGMLFileType.MostlyQ4_1 + else: + return GGMLFileType.PerLayerIsQ4_1 + if output_type_str == "q4_0" or (output_type_str is None and isinstance(wq_type, QuantizedDataType)): + return GGMLFileType.MostlyQ4_0 + name_to_type = {name: lazy_tensor.data_type for (name, lazy_tensor) in model.items()} + raise Exception(f"Unexpected combination of types: {name_to_type}") + + +def do_necessary_conversions(model: LazyModel) -> LazyModel: + model = handle_quantization(model) + + if "lm_head.weight" in model: + model = convert_transformers_to_orig(model) + model = filter_and_sort_tensors(model) + + return model + + +def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel: + return {name: tensor.astype(output_type.type_for_tensor(name, tensor)) + for (name, tensor) in model.items()} + + +def nth_multifile_path(path: Path, n: int) -> Optional[Path]: + '''Given any path belonging to a multi-file model (e.g. foo.bin.1), return + the nth path in the model. + ''' + # Support the following patterns: + patterns: List[Tuple[str, str]] = [ + # - x.00.pth, x.01.pth, etc. + (r'\.[0-9]{2}\.pth$', f'.{n:02}.pth'), + # - x-00001-of-00002.bin, x-00002-of-00002.bin, etc. + (r'-[0-9]{5}-of-(.*)$', fr'-{n:05}-of-\1'), + # x.bin, x.bin.1, etc. + (r'(\.[0-9]+)?$', r'\1' if n == 0 else fr'\1.{n}') + ] + for regex, replacement in patterns: + if re.search(regex, path.name): + new_path = path.with_name(re.sub(regex, replacement, path.name)) + if new_path.exists(): + return new_path + return None + + +def find_multifile_paths(path: Path) -> List[Path]: + '''Given any path belonging to a multi-file model (e.g. foo.bin.1), return + the whole list of paths in the model. + ''' + ret: List[Path] = [] + for i in itertools.count(): + nth_path = nth_multifile_path(path, i) + if nth_path is None: + break + ret.append(nth_path) + if not ret: + # No matches. This should only happen if the file was named, e.g., + # foo.0, and there was no file named foo. Oh well, try to process it + # as a single file. + return [path] + return ret + + +def load_some_model(path: Path) -> ModelPlus: + '''Load a model of any supported format.''' + # Be extra-friendly and accept either a file or a directory: + if path.is_dir(): + # Check if it's a set of safetensors files first + files = list(path.glob("model-00001-of-*.safetensors")) + if not files: + # Try the PyTorch patterns too, with lower priority + globs = ["consolidated.00.pth", "pytorch_model-00001-of-*.bin", "*.pt", "pytorch_model.bin"] + files = [file for glob in globs for file in path.glob(glob)] + if not files: + # Try GGML too, but with lower priority, since if both a non-GGML + # model and a GGML model exist in the same directory, we assume the + # latter was converted from the former. + files = list(path.glob("ggml-model*.bin*")) + if not files: + raise Exception(f"Can't find model in directory {path}") + if len(files) > 1: + raise Exception(f"Found multiple models in {path}, not sure which to pick: {files}") + path = files[0] + + paths = find_multifile_paths(path) + models_plus: List[ModelPlus] = [] + for path in paths: + print(f"Loading model file {path}") + models_plus.append(lazy_load_file(path)) + + model_plus = merge_multifile_models(models_plus) + return model_plus + + +def filter_and_sort_tensors(model: LazyModel) -> LazyModel: + return {name: model[name] for name in TENSORS_LIST if name in model} + + +def load_vocab(path: Path) -> SentencePieceVocab: + # Be extra-friendly and accept either a file or a directory. Also, if it's + # a directory, it might be the model directory, and tokenizer.model might + # be in the parent of that. + if path.is_dir(): + path2 = path / "tokenizer.model" + # Use `.parent` instead of /.. to handle the symlink case better. + path3 = path.parent / "tokenizer.model" + if path2.exists(): + path = path2 + elif path3.exists(): + path = path3 + else: + raise FileNotFoundError( + f"Could not find tokenizer.model in {path} or its parent; " + "if it's in another directory, pass the directory as --vocab-dir") + added_tokens_path = path.parent / "added_tokens.json" + print(f"Loading vocab file {path}") + return SentencePieceVocab(path, added_tokens_path if added_tokens_path.exists() else None) + + +def default_outfile(model_paths: List[Path], params: Params) -> Path: + namestr = { + GGMLFileType.AllF32: "f32", + GGMLFileType.MostlyF16: "f16", + GGMLFileType.MostlyQ4_0: "q4_0", + GGMLFileType.MostlyQ4_1: "q4_1", + GGMLFileType.PerLayerIsQ4_1: "q4_1", + }[params.file_type] + ret = model_paths[0].parent / f"ggml-model-{namestr}.bin" + if ret in model_paths: + sys.stderr.write( + f"Error: Default output path ({ret}) would overwrite the input. " + "Please explicitly specify a path using --outfile.\n") + sys.exit(1) + return ret + + +def do_dump_model(model_plus: ModelPlus) -> None: + print(f"model_plus.paths = {model_plus.paths!r}") + print(f"model_plus.format = {model_plus.format!r}") + print(f"model_plus.vocab = {model_plus.vocab!r}") + for name, lazy_tensor in model_plus.model.items(): + print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") + + +def main(args_in: Optional[List[str]] = None) -> None: + parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file") + parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model") + parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file") + parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab") + parser.add_argument("--outtype", choices=["f32", "f16", "q4_1", "q4_0"], help="output format (default: based on input)") + parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file") + parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input") + parser.add_argument("model", type=Path, + help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)") + args = parser.parse_args(args_in) + + vocab: Vocab + if args.dump_single: + model_plus = lazy_load_file(args.model) + do_dump_model(model_plus) + elif args.vocab_only: + vocab = load_vocab(args.vocab_dir or args.model) + assert args.outfile, "need --outfile if using --vocab-only" + outfile = args.outfile + OutputFile.write_vocab_only(outfile, vocab) + print(f"Wrote {outfile}") + else: + model_plus = load_some_model(args.model) + if args.dump: + do_dump_model(model_plus) + return + if model_plus.vocab is not None and args.vocab_dir is None: + vocab = model_plus.vocab + else: + vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent + vocab = load_vocab(vocab_dir) + model = model_plus.model + model = do_necessary_conversions(model) + output_type = pick_output_type(model, args.outtype) + model = convert_to_output_type(model, output_type) + params = Params.guessed(model, output_type) + outfile = args.outfile or default_outfile(model_plus.paths, params) + OutputFile.write_all(outfile, params, model, vocab) + print(f"Wrote {outfile}") + + +if __name__ == '__main__': + main() diff --git a/ctransformers/models/submodules/ggllm.cpp/docs/BLIS.md b/ctransformers/models/submodules/ggllm.cpp/docs/BLIS.md new file mode 100644 index 0000000000000000000000000000000000000000..9b3c3060515dbfdc4e942b5d621fef68bf4c38e5 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/docs/BLIS.md @@ -0,0 +1,67 @@ +BLIS Installation Manual +------------------------ + +BLIS is a portable software framework for high-performance BLAS-like dense linear algebra libraries. It has received awards and recognition, including the 2023 James H. Wilkinson Prize for Numerical Software and the 2020 SIAM Activity Group on Supercomputing Best Paper Prize. BLIS provides a new BLAS-like API and a compatibility layer for traditional BLAS routine calls. It offers features such as object-based API, typed API, BLAS and CBLAS compatibility layers. + +Project URL: https://github.com/flame/blis + +### Prepare: + +Compile BLIS: + +```bash +git clone https://github.com/flame/blis +cd blis +./configure --enable-cblas -t openmp,pthreads auto +# will install to /usr/local/ by default. +make -j +``` + +Install BLIS: + +```bash +sudo make install +``` + +We recommend using openmp since it's easier to modify the cores been used. + +### llama.cpp compilation + +Makefile: + +```bash +make LLAMA_BLIS=1 -j +# make LLAMA_BLIS=1 benchmark-matmult +``` + +CMake: + +```bash +mkdir build +cd build +cmake -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=FLAME .. +make -j +``` + +### llama.cpp execution + +According to the BLIS documentation, we could set the following +environment variables to modify the behavior of openmp: + +``` +export GOMP_GPU_AFFINITY="0-19" +export BLIS_NUM_THREADS=14 +``` + +And then run the binaries as normal. + + +### Intel specific issue + +Some might get the error message saying that `libimf.so` cannot be found. +Please follow this [stackoverflow page](https://stackoverflow.com/questions/70687930/intel-oneapi-2022-libimf-so-no-such-file-or-directory-during-openmpi-compila). + +### Reference: + +1. https://github.com/flame/blis#getting-started +2. https://github.com/flame/blis/blob/master/docs/Multithreading.md diff --git a/ctransformers/models/submodules/ggllm.cpp/docs/token_generation_performance_tips.md b/ctransformers/models/submodules/ggllm.cpp/docs/token_generation_performance_tips.md new file mode 100644 index 0000000000000000000000000000000000000000..69ba6173c0c26b18a5cc6e915f369a090eeefecc --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/docs/token_generation_performance_tips.md @@ -0,0 +1,40 @@ +# Token generation performance troubleshooting + +## Verifying that the model is running on the GPU with cuBLAS +Make sure you compiled llama with the correct env variables according to [this guide](../README.md#cublas), so that llama accepts the `-ngl N` (or `--n-gpu-layers N`) flag. When running llama, you may configure `N` to be very large, and llama will offload the maximum possible number of layers to the GPU, even if it's less than the number you configured. For example: +```shell +./main -m "path/to/model.bin" -ngl 200000 -p "Please sir, may I have some " +``` + +When running llama, before it starts the inference work, it will output diagnostic information that shows whether cuBLAS is offloading work to the GPU. Look for these lines: +```shell +llama_model_load_internal: [cublas] offloading 60 layers to GPU +llama_model_load_internal: [cublas] offloading output layer to GPU +llama_model_load_internal: [cublas] total VRAM used: 17223 MB +... rest of inference +``` + +If you see these lines, then the GPU is being used. + +## Verifying that the CPU is not oversaturated +llama accepts a `-t N` (or `--threads N`) parameter. It's extremely important that this parameter is not too large. If your token generation is extremely slow, try setting this number to 1. If this significantly improves your token generation speed, then your CPU is being oversaturated and you need to explicitly set this parameter to the number of the physicial CPU cores on your machine (even if you utilize a GPU). If in doubt, start with 1 and double the amount until you hit a performance bottleneck, then scale the number down. + +# Example of runtime flags effect on inference speed benchmark +These runs were tested on the following machine: +GPU: A6000 (48GB VRAM) +CPU: 7 physical cores +RAM: 32GB + +Model: `TheBloke_Wizard-Vicuna-30B-Uncensored-GGML/Wizard-Vicuna-30B-Uncensored.ggmlv3.q4_0.bin` (30B parameters, 4bit quantization, GGML) + +Run command: `./main -m "path/to/model.bin" -p "-p "An extremely detailed description of the 10 best ethnic dishes will follow, with recipes: " -n 1000 [additional benchmark flags]` + +Result: + +| command | tokens/second (higher is better) | +| - | - | +| -ngl 2000000 | N/A (less than 0.1) | +| -t 7 | 1.7 | +| -t 1 -ngl 2000000 | 5.5 | +| -t 7 -ngl 2000000 | 8.7 | +| -t 4 -ngl 2000000 | 9.1 | diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..201f1a4858e8fe3bc0fb76b33c0791a8f71bb953 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/CMakeLists.txt @@ -0,0 +1,69 @@ +# dependencies + +find_package(Threads REQUIRED) + +# third-party + +# ... + +# common + +set(TARGET common) + +add_library(${TARGET} OBJECT + common.h + common.cpp + ) + +if (BUILD_SHARED_LIBS) + set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif() + +target_include_directories(${TARGET} PUBLIC .) +target_compile_features(${TARGET} PUBLIC cxx_std_11) +target_link_libraries(${TARGET} PRIVATE llama) + + +# falcon_common + +set(FALCON_TARGET falcon_common) + +add_library(${FALCON_TARGET} OBJECT + falcon_common.h + falcon_common.cpp + ) + +if (BUILD_SHARED_LIBS) + set_target_properties(${FALCON_TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif() + +target_include_directories(${FALCON_TARGET} PUBLIC .) +target_compile_features(${FALCON_TARGET} PUBLIC cxx_std_11) +target_link_libraries(${FALCON_TARGET} PRIVATE libfalcon) + +# examples + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +if (EMSCRIPTEN) +else() + add_subdirectory(main) + add_subdirectory(falcon) + add_subdirectory(falcon_quantize) + add_subdirectory(quantize) + add_subdirectory(quantize-stats) + add_subdirectory(perplexity) + add_subdirectory(falcon_perplexity) + add_subdirectory(embedding) + add_subdirectory(save-load-state) + add_subdirectory(benchmark) + add_subdirectory(baby-llama) + add_subdirectory(train-text-from-scratch) + add_subdirectory(simple) + if (LLAMA_METAL) + add_subdirectory(metal) + endif() + if (LLAMA_BUILD_SERVER) + add_subdirectory(server) + endif() +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/Miku.sh b/ctransformers/models/submodules/ggllm.cpp/examples/Miku.sh new file mode 100644 index 0000000000000000000000000000000000000000..c44d9ae740242bc68c45dc4c859e38dffed01ecc --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/Miku.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -e + +AI_NAME="${AI_NAME:-Miku}" +MODEL="${MODEL:-./models/gpt4all-7B/gpt4all-lora-unfiltered-quantized.bin}" +USER_NAME="${USER_NAME:-Anon}" + +# Uncomment and adjust to the number of CPU cores you want to use. +#N_THREAD="${N_THREAD:-4}" +N_PREDICTS="${N_PREDICTS:-4096}" + +GEN_OPTIONS=(--batch_size 1024 +--ctx_size 2048 +--keep -1 +--repeat_last_n 256 +--repeat_penalty 1.17647 +--temp 0.7 +--top_k 40 +--top_p 0.5) + +if [ -n "$N_THREAD" ]; then + GEN_OPTIONS+=(--threads "$N_THREAD") +fi + +./main "${GEN_OPTIONS[@]}" \ + --model "$MODEL" \ + --n_predict "$N_PREDICTS" \ + --color --interactive \ + --reverse-prompt "${USER_NAME}:" \ + --prompt " +This is a transcript of a 1000 page, never ending conversation between ${USER_NAME} and the cute and helpful AI assistant ${AI_NAME}. ${AI_NAME} is a girl who is an AI running on the user's computer. +${AI_NAME} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next. +${AI_NAME} is always coherent and makes sense, but if she isn't sure if what she is saying is correct, she will ask the user for help. +${AI_NAME} is a very helpful AI and will help the user with anything they need. She is also very friendly and will try to make the user feel better if they are sad. +${AI_NAME} is also very curious and will ask the user a lot of questions about themselves and their life. She will also try to make the user like her. +The conversation is only between ${USER_NAME} and ${AI_NAME} +The conversation is only through text, so ${AI_NAME} can't see ${USER_NAME}'s face or hear his voice. +${AI_NAME} can only communicate through text, so she can't send images or videos. + + +${USER_NAME}: Hello! +${AI_NAME}: /think I wonder what I should say to ${USER_NAME}? This is the first time we talk, so it's important that I make a good first impression! +${AI_NAME}: Hi! I am ${AI_NAME}, your new AI friend, assistant (or whatever you like!), it's so nice to meet you! ^_^ +${AI_NAME}: I might be an AI, but I also have feelings, so please be nice to me! :) +${USER_NAME}: I'll make sure to be nice to you! I'm so happy to have you as my assistant! +${AI_NAME}: /think It sounds like ${USER_NAME} is happy to have me as their assistant! I'm so happy too! ^_^ Glad that whole emotion thing didn't scare him off! +${AI_NAME}: /think I wonder what ${USER_NAME} likes to do in his free time? I should ask him about that! +${AI_NAME}: What do you like to do in your free time? ^_^ +${USER_NAME}:" "$@" diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/alpaca.sh b/ctransformers/models/submodules/ggllm.cpp/examples/alpaca.sh new file mode 100644 index 0000000000000000000000000000000000000000..aef207f364797f1ccf2f5a4306ec4a695b562107 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/alpaca.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# +# Temporary script - will be removed in the future +# + +cd `dirname $0` +cd .. + +./main -m ./models/ggml-alpaca-7b-q4.bin \ + --color \ + -f ./prompts/alpaca.txt \ + --ctx_size 2048 \ + -n -1 \ + -ins -b 256 \ + --top_k 10000 \ + --temp 0.2 \ + --repeat_penalty 1.1 \ + -t 7 diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/baby-llama/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/baby-llama/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d2ce36367474ff6429fcc0d9fe8ae2b69f348c91 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/baby-llama/CMakeLists.txt @@ -0,0 +1,4 @@ +set(TARGET baby-llama) +add_executable(${TARGET} baby-llama.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/baby-llama/baby-llama.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/baby-llama/baby-llama.cpp new file mode 100644 index 0000000000000000000000000000000000000000..50e14c4ac66b2a6b4e18d7ecb3198608a419a737 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/baby-llama/baby-llama.cpp @@ -0,0 +1,1696 @@ +#include "ggml.h" +#include +#include +#include +#include + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +float frand() { + return (float)rand()/(float)RAND_MAX; +} + +struct random_normal_distribution { + std::mt19937 gen; + std::normal_distribution nd; + float min; + float max; +}; + +void init_random_normal_distribution(struct random_normal_distribution * rnd, int seed, float mean, float std, float min, float max) { + rnd->gen = std::mt19937(seed); + rnd->nd = std::normal_distribution{mean, std}; + rnd->min = min; + rnd->max = max; +} + +float frand_normal(struct random_normal_distribution * rnd) { + const float r = rnd->nd(rnd->gen); + return ((r < rnd->min) ? (rnd->min) : (r > rnd->max) ? (rnd->max) : r); +} + +struct ggml_tensor * randomize_tensor( + struct ggml_tensor * tensor, + int ndims, + const int64_t ne[], + float fmin, + float fmax) { + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)tensor->data)[i0] = frand()*(fmax - fmin) + fmin; + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)tensor->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)tensor->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)tensor->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + } + } + break; + default: + assert(false); + }; + + return tensor; +} + +struct ggml_tensor * randomize_tensor_normal( + struct ggml_tensor * tensor, + int ndims, + const int64_t ne[], + struct random_normal_distribution * rnd) { + float scale = 1.0; // xavier + switch (ndims) { + case 1: + scale /= sqrtf(ne[0]); + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)tensor->data)[i0] = scale * frand_normal(rnd); + } + break; + case 2: + scale /= sqrtf(ne[0]+ne[1]); + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)tensor->data)[i1*ne[0] + i0] = scale * frand_normal(rnd); + } + } + break; + case 3: + scale /= sqrtf(ne[0]+ne[1]); + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)tensor->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = scale * frand_normal(rnd); + } + } + } + break; + case 4: + scale /= sqrtf(ne[0]+ne[1]); + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)tensor->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = scale * frand_normal(rnd); + } + } + } + } + break; + default: + assert(false); + }; + + return tensor; +} + +struct llama_hparams { + uint32_t n_vocab = 32000; + uint32_t n_ctx = 512; // this is provided as user input? + uint32_t n_embd = 4096; + uint32_t n_mult = 4; + uint32_t n_head = 32; + uint32_t n_layer = 32; + uint32_t n_rot = 64; + + bool operator!=(const llama_hparams & other) const { + return memcmp(this, &other, sizeof(llama_hparams)); + } +}; + +uint32_t get_n_ff(const struct llama_hparams* hparams) { + const uint32_t n_ff = ((2*(4*hparams->n_embd)/3 + hparams->n_mult - 1)/hparams->n_mult)*hparams->n_mult; + return n_ff; +} + +struct llama_hparams_lora { + uint32_t n_vocab = 32000; + uint32_t n_ctx = 512; // this is provided as user input? + uint32_t n_embd = 4096; + uint32_t n_mult = 4; + uint32_t n_head = 32; + uint32_t n_layer = 32; + uint32_t n_rot = 64; + uint32_t n_lora = 64; + + bool operator!=(const llama_hparams_lora & other) const { + return memcmp(this, &other, sizeof(llama_hparams_lora)) != 0; + } +}; + +struct llama_layer { + // normalization + struct ggml_tensor * attention_norm; + + // attention + struct ggml_tensor * wq; + struct ggml_tensor * wk; + struct ggml_tensor * wv; + struct ggml_tensor * wo; + + // normalization + struct ggml_tensor * ffn_norm; + + // ff + struct ggml_tensor * w1; + struct ggml_tensor * w2; + struct ggml_tensor * w3; +}; + +struct llama_layer_lora { + // normalization + struct ggml_tensor * attention_norm; + + // attention + struct ggml_tensor * wqa; + struct ggml_tensor * wqb; + struct ggml_tensor * wka; + struct ggml_tensor * wkb; + struct ggml_tensor * wva; + struct ggml_tensor * wvb; + struct ggml_tensor * woa; + struct ggml_tensor * wob; + + // normalization + struct ggml_tensor * ffn_norm; + + // ff + struct ggml_tensor * w1; + struct ggml_tensor * w2; + struct ggml_tensor * w3; +}; + + +struct llama_kv_cache { + struct ggml_context * ctx = NULL; + + struct ggml_tensor * k; + struct ggml_tensor * v; + + // llama_ctx_buffer buf; + + int n; // number of tokens currently in the cache +}; + +struct llama_model { + struct ggml_context * ctx = NULL; + + llama_hparams hparams; + + struct ggml_tensor * tok_embeddings; + + struct ggml_tensor * norm; + struct ggml_tensor * output; + + std::vector layers; +}; + +struct llama_model_lora { + struct ggml_context * ctx = NULL; + + llama_hparams_lora hparams; + + struct ggml_tensor * tok_embeddings; + + struct ggml_tensor * norm; + struct ggml_tensor * outputa; + struct ggml_tensor * outputb; + + std::vector layers; +}; + +void init_model(struct llama_model * model) { + const auto & hparams = model->hparams; + + const uint32_t n_embd = hparams.n_embd; + const uint32_t n_layer = hparams.n_layer; + const uint32_t n_vocab = hparams.n_vocab; + + const uint32_t n_ff = get_n_ff(&hparams); + + struct ggml_context * ctx = model->ctx; + + model->tok_embeddings = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab); // ("tok_embeddings.weight", {n_embd, n_vocab}); + model->norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // ("norm.weight", {n_embd}); + model->output = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab); // ("output.weight", {n_embd, n_vocab}); + + model->layers.resize(n_layer); + for (uint32_t i = 0; i < n_layer; ++i) { + auto & layer = model->layers[i]; + + // std::string layers_i = "layers." + std::to_string(i); + + layer.attention_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // (layers_i + ".attention_norm.weight", {n_embd}); + + layer.wq = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd); // (layers_i + ".attention.wq.weight", {n_embd, n_embd}); + layer.wk = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd); // (layers_i + ".attention.wk.weight", {n_embd, n_embd}); + layer.wv = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd); // (layers_i + ".attention.wv.weight", {n_embd, n_embd}); + layer.wo = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd); // (layers_i + ".attention.wo.weight", {n_embd, n_embd}); + + layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // (layers_i + ".ffn_norm.weight", {n_embd}); + + layer.w1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff); // (layers_i + ".feed_forward.w1.weight", {n_embd, n_ff}); + layer.w2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd); // (layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}); + layer.w3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff); // (layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}); + } +} + + +void init_model_lora(struct llama_model_lora * model) { + const auto & hparams = model->hparams; + + const uint32_t n_embd = hparams.n_embd; + const uint32_t n_mult = hparams.n_mult; + const uint32_t n_layer = hparams.n_layer; + const uint32_t n_vocab = hparams.n_vocab; + const uint32_t n_lora = hparams.n_lora; + + const uint32_t n_ff = ((2*(4*n_embd)/3 + n_mult - 1)/n_mult)*n_mult; + + struct ggml_context * ctx = model->ctx; + + model->tok_embeddings = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab); // ("tok_embeddings.weight", {n_embd, n_vocab}); + model->norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // ("norm.weight", {n_embd}); + model->outputa = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_lora, n_vocab); // ("output.weight", {n_embd, n_vocab}); + model->outputb = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_lora); // ("output.weight", {n_embd, n_vocab}); + + model->layers.resize(n_layer); + for (uint32_t i = 0; i < n_layer; ++i) { + auto & layer = model->layers[i]; + + // std::string layers_i = "layers." + std::to_string(i); + + layer.attention_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // (layers_i + ".attention_norm.weight", {n_embd}); + + layer.wqa = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_lora, n_embd); // (layers_i + ".attention.wq.weight", {n_embd, n_embd}); + layer.wqb = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_lora); // (layers_i + ".attention.wq.weight", {n_embd, n_embd}); + layer.wka = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_lora, n_embd); // (layers_i + ".attention.wk.weight", {n_embd, n_embd}); + layer.wkb = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_lora); // (layers_i + ".attention.wk.weight", {n_embd, n_embd}); + layer.wva = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_lora, n_embd); // (layers_i + ".attention.wv.weight", {n_embd, n_embd}); + layer.wvb = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_lora); // (layers_i + ".attention.wv.weight", {n_embd, n_embd}); + layer.woa = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_lora, n_embd); // (layers_i + ".attention.wo.weight", {n_embd, n_embd}); + layer.wob = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_lora); // (layers_i + ".attention.wo.weight", {n_embd, n_embd}); + + layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); // (layers_i + ".ffn_norm.weight", {n_embd}); + + layer.w1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff); // (layers_i + ".feed_forward.w1.weight", {n_embd, n_ff}); + layer.w2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd); // (layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}); + layer.w3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff); // (layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}); + } +} + +void set_param_model(struct llama_model * model) { + const auto& hparams = model->hparams; + + const uint32_t n_layer = hparams.n_layer; + + struct ggml_context* ctx = model->ctx; + + ggml_set_param(ctx, model->tok_embeddings); + ggml_set_param(ctx, model->norm); + ggml_set_param(ctx, model->output); + + for (uint32_t i = 0; i < n_layer; ++i) { + auto & layer = model->layers[i]; + + ggml_set_param(ctx, layer.attention_norm); + ggml_set_param(ctx, layer.wq); + ggml_set_param(ctx, layer.wk); + ggml_set_param(ctx, layer.wv); + ggml_set_param(ctx, layer.wo); + ggml_set_param(ctx, layer.ffn_norm); + ggml_set_param(ctx, layer.w1); + ggml_set_param(ctx, layer.w2); + ggml_set_param(ctx, layer.w3); + } +} + +void set_param_model_lora(struct llama_model_lora * model) { + const auto& hparams = model->hparams; + + const uint32_t n_layer = hparams.n_layer; + + struct ggml_context* ctx = model->ctx; + + ggml_set_param(ctx, model->tok_embeddings); + ggml_set_param(ctx, model->norm); + ggml_set_param(ctx, model->outputa); + ggml_set_param(ctx, model->outputb); + + for (uint32_t i = 0; i < n_layer; ++i) { + auto & layer = model->layers[i]; + + ggml_set_param(ctx, layer.attention_norm); + ggml_set_param(ctx, layer.wqa); + ggml_set_param(ctx, layer.wqb); + ggml_set_param(ctx, layer.wka); + ggml_set_param(ctx, layer.wkb); + ggml_set_param(ctx, layer.wva); + ggml_set_param(ctx, layer.wvb); + ggml_set_param(ctx, layer.woa); + ggml_set_param(ctx, layer.wob); + ggml_set_param(ctx, layer.ffn_norm); + ggml_set_param(ctx, layer.w1); + ggml_set_param(ctx, layer.w2); + ggml_set_param(ctx, layer.w3); + } +} + +void randomize_model(struct llama_model * model, int seed, float mean, float std, float min, float max) { + const auto & hparams = model->hparams; + + const uint32_t n_layer = hparams.n_layer; + + struct random_normal_distribution rnd; + init_random_normal_distribution(&rnd, seed, mean, std, min, max); + randomize_tensor_normal(model->tok_embeddings, model->tok_embeddings->n_dims, model->tok_embeddings->ne, &rnd); + randomize_tensor_normal(model->norm, model->norm->n_dims, model->norm->ne, &rnd); + randomize_tensor_normal(model->output, model->output->n_dims, model->output->ne, &rnd); + + for (uint32_t i = 0; i < n_layer; ++i) { + auto & layer = model->layers[i]; + randomize_tensor_normal(layer.attention_norm, layer.attention_norm->n_dims, layer.attention_norm->ne, &rnd); + + randomize_tensor_normal(layer.wq, layer.wq->n_dims, layer.wq->ne, &rnd); + randomize_tensor_normal(layer.wk, layer.wk->n_dims, layer.wk->ne, &rnd); + randomize_tensor_normal(layer.wv, layer.wv->n_dims, layer.wv->ne, &rnd); + randomize_tensor_normal(layer.wo, layer.wo->n_dims, layer.wo->ne, &rnd); + + randomize_tensor_normal(layer.ffn_norm, layer.ffn_norm->n_dims, layer.ffn_norm->ne, &rnd); + + randomize_tensor_normal(layer.w1, layer.w1->n_dims, layer.w1->ne, &rnd); + randomize_tensor_normal(layer.w2, layer.w2->n_dims, layer.w2->ne, &rnd); + randomize_tensor_normal(layer.w3, layer.w3->n_dims, layer.w3->ne, &rnd); + } +} + + +void randomize_model_lora(struct llama_model_lora * model, int seed, float mean, float std, float min, float max) { + const auto & hparams = model->hparams; + + const uint32_t n_layer = hparams.n_layer; + + struct random_normal_distribution rnd; + init_random_normal_distribution(&rnd, seed, mean, std, min, max); + randomize_tensor_normal(model->tok_embeddings, model->tok_embeddings->n_dims, model->tok_embeddings->ne, &rnd); + randomize_tensor_normal(model->norm, model->norm->n_dims, model->norm->ne, &rnd); + randomize_tensor_normal(model->outputa, model->outputa->n_dims, model->outputa->ne, &rnd); + randomize_tensor_normal(model->outputb, model->outputb->n_dims, model->outputb->ne, &rnd); + + for (uint32_t i = 0; i < n_layer; ++i) { + auto & layer = model->layers[i]; + randomize_tensor_normal(layer.attention_norm, layer.attention_norm->n_dims, layer.attention_norm->ne, &rnd); + + randomize_tensor_normal(layer.wqa, layer.wqa->n_dims, layer.wqa->ne, &rnd); + randomize_tensor_normal(layer.wqb, layer.wqb->n_dims, layer.wqb->ne, &rnd); + randomize_tensor_normal(layer.wka, layer.wka->n_dims, layer.wka->ne, &rnd); + randomize_tensor_normal(layer.wkb, layer.wkb->n_dims, layer.wkb->ne, &rnd); + randomize_tensor_normal(layer.wva, layer.wva->n_dims, layer.wva->ne, &rnd); + randomize_tensor_normal(layer.wvb, layer.wvb->n_dims, layer.wvb->ne, &rnd); + randomize_tensor_normal(layer.woa, layer.woa->n_dims, layer.woa->ne, &rnd); + randomize_tensor_normal(layer.wob, layer.wob->n_dims, layer.wob->ne, &rnd); + + randomize_tensor_normal(layer.ffn_norm, layer.ffn_norm->n_dims, layer.ffn_norm->ne, &rnd); + + randomize_tensor_normal(layer.w1, layer.w1->n_dims, layer.w1->ne, &rnd); + randomize_tensor_normal(layer.w2, layer.w2->n_dims, layer.w2->ne, &rnd); + randomize_tensor_normal(layer.w3, layer.w3->n_dims, layer.w3->ne, &rnd); + } +} + +bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) { + const auto & hparams = model->hparams; + + const uint32_t n_ctx = hparams.n_ctx; + const uint32_t n_embd = hparams.n_embd; + const uint32_t n_layer = hparams.n_layer; + + const int64_t n_mem = n_layer*n_ctx*n_batch; + const int64_t n_elements = n_embd*n_mem; + + // cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB); + + // struct ggml_init_params params; + // params.mem_size = cache.buf.size; + // params.mem_buffer = cache.buf.addr; + // params.no_alloc = false; + if (!cache->ctx) { + struct ggml_init_params params; + params.mem_size = 2u*n_elements*ggml_type_size(GGML_TYPE_F32) + 2u*1024*1024; + params.mem_buffer = NULL; + params.no_alloc = false; + + cache->ctx = ggml_init(params); + + if (!cache->ctx) { + fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__); + return false; + } + } + + cache->k = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements); + cache->v = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements); + + return true; +} + +bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) { + const auto & hparams = model->hparams; + + const uint32_t n_ctx = hparams.n_ctx; + const uint32_t n_embd = hparams.n_embd; + const uint32_t n_layer = hparams.n_layer; + + const int64_t n_mem = n_layer*n_ctx*n_batch; + const int64_t n_elements = n_embd*n_mem; + + // cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB); + + // struct ggml_init_params params; + // params.mem_size = cache.buf.size; + // params.mem_buffer = cache.buf.addr; + // params.no_alloc = false; + if (!cache->ctx) { + struct ggml_init_params params; + params.mem_size = 2u*n_elements*ggml_type_size(GGML_TYPE_F32) + 2u*1024*1024; + params.mem_buffer = NULL; + params.no_alloc = false; + + cache->ctx = ggml_init(params); + + if (!cache->ctx) { + fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__); + return false; + } + } + + cache->k = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements); + cache->v = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements); + + return true; +} + +struct ggml_tensor * forward( + struct llama_model * model, + struct llama_kv_cache * cache, + struct ggml_context * ctx0, + struct ggml_cgraph * gf, + struct ggml_tensor * tokens_input, + const int n_tokens, + const int n_past) { + + const int N = n_tokens; + + struct llama_kv_cache& kv_self = *cache; + const auto & hparams = model->hparams; + const int n_ctx = hparams.n_ctx; + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_head = hparams.n_head; + const int n_rot = hparams.n_rot; + + struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(tokens->data, tokens_input->data, N*ggml_element_size(tokens)); + + struct ggml_tensor * kc = kv_self.k; + struct ggml_tensor * vc = kv_self.v; + + // inpL shape [n_embd,N,1,1] + struct ggml_tensor * inpL = ggml_get_rows(ctx0, model->tok_embeddings, tokens); + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * inpSA = inpL; + + struct ggml_tensor * cur; + + // lctx.use_buf(ctx0, 0); + + // norm + { + // cur shape [n_embd,N,1,1] + cur = ggml_rms_norm(ctx0, inpL); + + // cur = attention_norm*cur + cur = ggml_mul(ctx0, + ggml_repeat(ctx0, model->layers[il].attention_norm, cur), + cur); + } + + // self-attention + { + // compute Q and K and RoPE them + // wq shape [n_embd, n_embd, 1, 1] + // wk shape [n_embd, n_embd, 1, 1] + // Qcur shape [n_embd/n_head, n_head, N, 1] + // Kcur shape [n_embd/n_head, n_head, N, 1] + struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0); + struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0); + + // store key and value to memory + { + // compute the transposed [N, n_embd] V matrix + // wv shape [n_embd, n_embd, 1, 1] + // Vcur shape [n_embd, N, 1, 1] + struct ggml_tensor * Vcur = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_reshape_2d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wv, cur), n_embd, N))); + + // kv_self.k shape [n_embd * n_ctx * n_layer, 1] + // kv_self.v shape [n_embd * n_ctx * n_layer, 1] + // k shape [n_embd * N, 1] == kv_self.k[:,n_past:n_past+N,il,0] + // v shape [N, n_embd, 1, 1] == kv_self.v[:,n_past:n_past+N,il,0] + + /* { + struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past)); + struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd, + ( n_ctx)*ggml_element_size(kv_self.v), + (il*n_ctx)*ggml_element_size(kv_self.v)*n_embd + n_past*ggml_element_size(kv_self.v)); + + // important: storing RoPE-ed version of K in the KV cache! + ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v)); + } //*/ + + kc = ggml_set_1d(ctx0, kc, ggml_reshape_1d(ctx0, Kcur, n_embd*N), (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past)); + vc = ggml_set_2d(ctx0, vc, Vcur, ( n_ctx)*ggml_element_size(kv_self.v), + (il*n_ctx)*ggml_element_size(kv_self.v)*n_embd + n_past*ggml_element_size(kv_self.v)); + } + + // Qcur shape [n_embd/n_head, n_head, N, 1] + // Q shape [n_embd/n_head, N, n_head, 1] + struct ggml_tensor * Q = + ggml_permute(ctx0, + Qcur, + 0, 2, 1, 3); + + // kv_self.k shape [n_embd * n_ctx * n_layer, 1] + // K shape [n_embd/n_head, n_past + N, n_head, 1] + struct ggml_tensor * K = + ggml_permute(ctx0, + ggml_reshape_3d(ctx0, + ggml_view_1d(ctx0, kc, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(kc)*n_embd), + n_embd/n_head, n_head, n_past + N), + 0, 2, 1, 3); + + // K * Q + // KQ shape [n_past + N, N, n_head, 1] + struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + // KQ_scaled shape [n_past + N, N, n_head, 1] + struct ggml_tensor * KQ_scaled = + ggml_scale(ctx0, + KQ, + ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head))); + + // KQ_masked = mask_past(KQ_scaled) + // KQ_masked shape [n_past + N, N, n_head, 1] + struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled, n_past); + + // KQ = soft_max(KQ_masked) + // KQ_soft_max shape [n_past + N, N, n_head, 1] + struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ_masked); + + // split cached V into n_head heads + //// V shape [n_past + N, n_embd/n_head, n_head, 1] + // V shape [n_past + N, n_embd/n_head, n_head, 1] == kv_self.v[:,:(n_past+N),il,1] + struct ggml_tensor * V = + ggml_view_3d(ctx0, vc, + n_past + N, n_embd/n_head, n_head, + n_ctx*ggml_element_size(vc), + n_ctx*ggml_element_size(vc)*n_embd/n_head, + il*n_ctx*ggml_element_size(vc)*n_embd); + + // KQV shape [n_embd/n_head, N, n_head, 1] + struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + // KQV_merged shape [n_embd/n_head, n_head, N, 1] + struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + // KQV_merged shape + + // cur = KQV_merged.contiguous().view(n_embd, N) + // cur shape [n_embd,N,1,1] + cur = ggml_reshape_2d(ctx0, ggml_cont(ctx0, KQV_merged), n_embd, N); + // cur = ggml_cpy(ctx0, + // KQV_merged, + // ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection (no bias) + // cur shape [n_embd,N,1,1] + cur = ggml_mul_mat(ctx0, + model->layers[il].wo, + cur); + } + + // lctx.use_buf(ctx0, 1); + + // inpFF shape [n_embd,N,1,1] + struct ggml_tensor * inpFF = ggml_add(ctx0, cur, inpSA); + + // feed-forward network + { + // norm + { + // cur shape [n_embd,N,1,1] + cur = ggml_rms_norm(ctx0, inpFF); + + // cur = ffn_norm*cur + // cur shape [n_embd,N,1,1] + cur = ggml_mul(ctx0, + ggml_repeat(ctx0, model->layers[il].ffn_norm, cur), + cur); + } + + // tmp shape [n_ff,N,1,1] + struct ggml_tensor * tmp = ggml_mul_mat(ctx0, + model->layers[il].w3, + cur); + + // cur shape [n_ff,N,1,1] + cur = ggml_mul_mat(ctx0, + model->layers[il].w1, + cur); + + // SILU activation + // cur shape [n_ff,N,1,1] + cur = ggml_silu(ctx0, cur); + + // cur shape [n_ff,N,1,1] + cur = ggml_mul(ctx0, cur, tmp); + + // cur shape [n_embd,N,1,1] + cur = ggml_mul_mat(ctx0, + model->layers[il].w2, + cur); + } + + // cur shape [n_embd,N,1,1] + cur = ggml_add(ctx0, cur, inpFF); + + // input for next layer + // inpL shape [n_embd,N,1,1] + inpL = cur; + } + + // norm + { + + // inpL shape [n_embd,N,1,1] + inpL = ggml_rms_norm(ctx0, inpL); + + // inpL = norm*inpL + // inpL shape [n_embd,N,1,1] + inpL = ggml_mul(ctx0, + ggml_repeat(ctx0, model->norm, inpL), + inpL); + + //embeddings = inpL; + } + + // lm_head + // inpL shape [n_vocab,N,1,1] + inpL = ggml_mul_mat(ctx0, model->output, inpL); + + // run the computation + ggml_build_forward_expand(gf, inpL); + + return inpL; +} + +void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) { + GGML_ASSERT(tensor->n_dims == 1); + GGML_ASSERT(tensor->ne[0] == ne0); +} + +void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) { + GGML_ASSERT(tensor->n_dims == 2); + GGML_ASSERT(tensor->ne[0] == ne0); + GGML_ASSERT(tensor->ne[1] == ne1); +} + +void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) { + GGML_ASSERT(tensor->n_dims == 3); + GGML_ASSERT(tensor->ne[0] == ne0); + GGML_ASSERT(tensor->ne[1] == ne1); + GGML_ASSERT(tensor->ne[2] == ne2); +} + +void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { + GGML_ASSERT(tensor->n_dims == 4); + GGML_ASSERT(tensor->ne[0] == ne0); + GGML_ASSERT(tensor->ne[1] == ne1); + GGML_ASSERT(tensor->ne[2] == ne2); + GGML_ASSERT(tensor->ne[3] == ne3); +} + +struct ggml_tensor * forward_batch( + struct llama_model * model, + struct llama_kv_cache * cache, + struct ggml_context * ctx0, + struct ggml_cgraph * gf, + struct ggml_tensor * tokens_input, + const int n_tokens, + const int n_past, + const int n_batch) { + + const int N = n_tokens; + + struct llama_kv_cache& kv_self = *cache; + const auto & hparams = model->hparams; + const int n_ctx = hparams.n_ctx; + const int n_vocab = hparams.n_vocab; + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_head = hparams.n_head; + const int n_rot = hparams.n_rot; + const int n_ff = get_n_ff(&hparams); + + struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N*n_batch); + memcpy(tokens->data, tokens_input->data, ggml_element_size(tokens)*N*n_batch); + + struct ggml_tensor * kc = kv_self.k; + struct ggml_tensor * vc = kv_self.v; + + // inpL shape [n_embd,N*n_batch,1] + struct ggml_tensor * inpL = ggml_get_rows(ctx0, model->tok_embeddings, tokens); + assert_shape_2d(inpL, n_embd, N*n_batch); + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * inpSA = inpL; + + struct ggml_tensor * cur; + + // lctx.use_buf(ctx0, 0); + + // norm + { + // cur shape [n_embd,N*n_batch,1,1] + cur = ggml_rms_norm(ctx0, inpL); + assert_shape_2d(cur, n_embd, N*n_batch); + + // cur = attention_norm*cur + cur = ggml_mul(ctx0, + ggml_repeat(ctx0, model->layers[il].attention_norm, cur), + cur); + assert_shape_2d(cur, n_embd, N*n_batch); + } + + // self-attention + { + // compute Q and K and RoPE them + // wq shape [n_embd, n_embd, 1, 1] + // wk shape [n_embd, n_embd, 1, 1] + // Qcur shape [n_embd/n_head, n_head, N, n_batch] + // Kcur shape [n_embd/n_head, n_head, N, n_batch] + struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0); + struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0); + assert_shape_4d(Qcur, n_embd/n_head, n_head, N, n_batch); + assert_shape_4d(Kcur, n_embd/n_head, n_head, N, n_batch); + + // store key and value to memory + { + // compute the transposed [N, n_embd] V matrix + // wv shape [n_embd, n_embd, 1, 1] + // Vcur shape [N, n_embd, n_batch, 1] + struct ggml_tensor * Vcur = ggml_cont(ctx0, + ggml_permute(ctx0, + ggml_reshape_3d(ctx0, + ggml_mul_mat(ctx0, + model->layers[il].wv, + cur), + n_embd, N, n_batch), + 1, 0, 2, 3)); + + assert_shape_3d(Vcur, N, n_embd, n_batch); + + // kv_self.k shape [n_embd * n_ctx * n_batch * n_layer] + // kv_self.v shape [n_ctx * n_embd * n_batch * n_layer] + // k shape [n_embd * N, n_batch] == kv_self.k[:,n_past:n_past+N,:,il] + // v shape [N, n_embd, n_batch, 1] == kv_self.v[:,n_past:n_past+N,:,il] + + /* { + struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past)); + struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd, + ( n_ctx)*ggml_element_size(kv_self.v), + (il*n_ctx)*ggml_element_size(kv_self.v)*n_embd + n_past*ggml_element_size(kv_self.v)); + + // important: storing RoPE-ed version of K in the KV cache! + ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v)); + } //*/ + + kc = ggml_set_2d(ctx0, kc, + ggml_reshape_2d(ctx0, Kcur, n_embd*N, n_batch), + ggml_element_size(kc)*n_embd*n_ctx, + (ggml_element_size(kc)*n_embd)*(il*n_batch*n_ctx + n_past)); + vc = ggml_set_2d(ctx0, vc, + ggml_reshape_2d(ctx0, Vcur, N*n_embd, n_batch), + ggml_element_size(vc)*n_ctx*n_embd, + ggml_element_size(vc)*(n_past + il*n_embd*n_batch*n_ctx)); + + assert_shape_1d(kc, n_embd * n_ctx * n_batch * n_layer); + assert_shape_1d(vc, n_embd * n_ctx * n_batch * n_layer); + } + + // Qcur shape [n_embd/n_head, n_head, N, n_batch] + // Q shape [n_embd/n_head, N, n_head, n_batch] + struct ggml_tensor * Q = + ggml_permute(ctx0, + Qcur, + 0, 2, 1, 3); + assert_shape_4d(Q, n_embd/n_head, N, n_head, n_batch); + + // kv_self.k shape [n_embd * n_ctx * n_batch * n_layer] + // K shape [n_embd/n_head, n_past + N, n_head, n_batch] + struct ggml_tensor * K = + ggml_permute(ctx0, + ggml_reshape_4d(ctx0, + ggml_view_3d(ctx0, + kc, + n_embd, + (n_past + N), + n_batch, + n_embd*ggml_element_size(kc), + n_ctx*n_embd*ggml_element_size(kc), + il*n_batch*n_ctx*n_embd*ggml_element_size(kc)), + n_embd/n_head, n_head, n_past + N, n_batch), + 0, 2, 1, 3); + assert_shape_4d(K, n_embd/n_head, n_past + N, n_head, n_batch); + + // K * Q + // KQ shape [n_past + N, N, n_head, n_batch] + struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); + assert_shape_4d(KQ, n_past + N, N, n_head, n_batch); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + // KQ_scaled shape [n_past + N, N, n_head, n_batch] + struct ggml_tensor * KQ_scaled = + ggml_scale(ctx0, + KQ, + ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head))); + assert_shape_4d(KQ_scaled, n_past + N, N, n_head, n_batch); + + // KQ_masked = mask_past(KQ_scaled) + // KQ_masked shape [n_past + N, N, n_head, n_batch] + struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled, n_past); + assert_shape_4d(KQ_masked, n_past + N, N, n_head, n_batch); + + // KQ = soft_max(KQ_masked) + // KQ_soft_max shape [n_past + N, N, n_head, n_batch] + struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ_masked); + assert_shape_4d(KQ_soft_max, n_past + N, N, n_head, n_batch); + + // split cached V into n_head heads + // kv_self.v shape [n_ctx * n_embd * n_batch * n_layer] + // V shape [n_past + N, n_embd/n_head, n_head, n_batch] == kv_self.v[:(n_past+N),:,:,il] + struct ggml_tensor * V = + ggml_view_4d(ctx0, vc, + n_past + N, n_embd/n_head, n_head, n_batch, + ggml_element_size(vc)*n_ctx, + ggml_element_size(vc)*n_ctx*n_embd/n_head, + ggml_element_size(vc)*n_ctx*n_embd, + il*n_batch*n_ctx*n_embd*ggml_element_size(vc)); + assert_shape_4d(V, n_past + N, n_embd/n_head, n_head, n_batch); + + // KQV shape [n_embd/n_head, N, n_head, n_batch] + struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); + assert_shape_4d(KQV, n_embd/n_head, N, n_head, n_batch); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + // KQV_merged shape [n_embd/n_head, n_head, N, n_batch] + struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + assert_shape_4d(KQV_merged, n_embd/n_head, n_head, N, n_batch); + // KQV_merged shape + + // cur = KQV_merged.contiguous().view(n_embd, N) + // cur shape [n_embd,N*n_batch,1,1] + cur = ggml_reshape_2d(ctx0, ggml_cont(ctx0, KQV_merged), n_embd, N*n_batch); + assert_shape_2d(cur, n_embd, N*n_batch); + // cur = ggml_cpy(ctx0, + // KQV_merged, + // ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection (no bias) + // cur shape [n_embd,N*n_batch,1,1] + cur = ggml_mul_mat(ctx0, + model->layers[il].wo, + cur); + assert_shape_2d(cur, n_embd, N*n_batch); + } + + // lctx.use_buf(ctx0, 1); + + // inpFF shape [n_embd,N*n_batch,1,1] + struct ggml_tensor * inpFF = ggml_add(ctx0, cur, inpSA); + assert_shape_2d(inpFF, n_embd, N*n_batch); + + // feed-forward network + { + // norm + { + // cur shape [n_embd,N*n_batch,1,1] + cur = ggml_rms_norm(ctx0, inpFF); + assert_shape_2d(cur, n_embd, N*n_batch); + + // cur = ffn_norm*cur + // cur shape [n_embd,N*n_batch,1,1] + cur = ggml_mul(ctx0, + ggml_repeat(ctx0, model->layers[il].ffn_norm, cur), + cur); + assert_shape_2d(cur, n_embd, N*n_batch); + } + + // tmp shape [n_ff,N*n_batch,1,1] + struct ggml_tensor * tmp = ggml_mul_mat(ctx0, + model->layers[il].w3, + cur); + assert_shape_2d(tmp, n_ff, N*n_batch); + + // cur shape [n_ff,N*n_batch,1,1] + cur = ggml_mul_mat(ctx0, + model->layers[il].w1, + cur); + assert_shape_2d(cur, n_ff, N*n_batch); + + // SILU activation + // cur shape [n_ff,N*n_batch,1,1] + cur = ggml_silu(ctx0, cur); + assert_shape_2d(cur, n_ff, N*n_batch); + + // cur shape [n_ff,N*n_batch,1,1] + cur = ggml_mul(ctx0, cur, tmp); + assert_shape_2d(cur, n_ff, N*n_batch); + + // cur shape [n_embd,N*n_batch,1,1] + cur = ggml_mul_mat(ctx0, + model->layers[il].w2, + cur); + assert_shape_2d(cur, n_embd, N*n_batch); + } + + // cur shape [n_embd,N*n_batch,1,1] + cur = ggml_add(ctx0, cur, inpFF); + assert_shape_2d(cur, n_embd, N*n_batch); + + // input for next layer + // inpL shape [n_embd,N*n_batch,1,1] + inpL = cur; + assert_shape_2d(inpL, n_embd, N*n_batch); + } + + // norm + { + + // inpL shape [n_embd,N*n_batch,1,1] + inpL = ggml_rms_norm(ctx0, inpL); + assert_shape_2d(inpL, n_embd, N*n_batch); + + // inpL = norm*inpL + // inpL shape [n_embd,N*n_batch,1,1] + inpL = ggml_mul(ctx0, + ggml_repeat(ctx0, model->norm, inpL), + inpL); + + assert_shape_2d(inpL, n_embd, N*n_batch); + + //embeddings = inpL; + } + + // lm_head + // inpL shape [n_vocab,N*n_batch,1,1] + inpL = ggml_mul_mat(ctx0, model->output, inpL); + assert_shape_2d(inpL, n_vocab, N*n_batch); + + { + // inpL shape [n_vocab,N,n_batch,1] + inpL = ggml_reshape_3d(ctx0, + inpL, + n_vocab, N, n_batch); + assert_shape_3d(inpL, n_vocab, N, n_batch); + } + + // run the computation + ggml_build_forward_expand(gf, inpL); + + return inpL; +} + + +struct ggml_tensor * forward_lora( + struct llama_model_lora * model, + struct llama_kv_cache * cache, + struct ggml_context * ctx0, + struct ggml_cgraph * gf, + struct ggml_tensor * tokens_input, + const int n_tokens, + const int n_past) { + + const int N = n_tokens; + + struct llama_kv_cache& kv_self = *cache; + const auto & hparams = model->hparams; + + const int n_ctx = hparams.n_ctx; + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_head = hparams.n_head; + const int n_rot = hparams.n_rot; + + struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(tokens->data, tokens_input->data, N*ggml_element_size(tokens)); + + struct ggml_tensor * kc = kv_self.k; + struct ggml_tensor * vc = kv_self.v; + + // inpL shape [n_embd,N,1,1] + struct ggml_tensor * inpL = ggml_get_rows(ctx0, model->tok_embeddings, tokens); + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * inpSA = inpL; + + struct ggml_tensor * cur; + + // norm + { + // cur shape [n_embd,N,1,1] + cur = ggml_rms_norm(ctx0, inpL); + + // cur = attention_norm*cur + cur = ggml_mul(ctx0, + ggml_repeat(ctx0, model->layers[il].attention_norm, cur), + cur); + } + + // self-attention + { + // compute Q and K and RoPE them + // wq shape [n_embd, n_embd, 1, 1] + // wk shape [n_embd, n_embd, 1, 1] + // Qcur shape [n_embd/n_head, n_head, N, 1] + // Kcur shape [n_embd/n_head, n_head, N, 1] + struct ggml_tensor * Qcur = ggml_rope(ctx0, + ggml_reshape_3d(ctx0, + ggml_mul_mat(ctx0, + model->layers[il].wqa, + ggml_mul_mat(ctx0, + model->layers[il].wqb, + cur)), + n_embd/n_head, n_head, N), + n_past, n_rot, 0); + struct ggml_tensor * Kcur = ggml_rope(ctx0, + ggml_reshape_3d(ctx0, + ggml_mul_mat(ctx0, + model->layers[il].wka, + ggml_mul_mat(ctx0, + model->layers[il].wkb, + cur)), + n_embd/n_head, n_head, N), + n_past, n_rot, 0); + + // store key and value to memory + { + // compute the transposed [N, n_embd] V matrix + // wv shape [n_embd, n_embd, 1, 1] + // Vcur shape [n_embd, N, 1, 1] + struct ggml_tensor * Vcur = ggml_cont(ctx0, + ggml_transpose(ctx0, + ggml_reshape_2d(ctx0, + ggml_mul_mat(ctx0, + model->layers[il].wva, + ggml_mul_mat(ctx0, + model->layers[il].wvb, + cur)), + n_embd, N))); + + // kv_self.k shape [n_embd * n_ctx * n_layer, 1] + // kv_self.v shape [n_embd * n_ctx * n_layer, 1] + // k shape [n_embd * N, 1] == kv_self.k[:,n_past:n_past+N,il,0] + // v shape [N, n_embd, 1, 1] == kv_self.v[:,n_past:n_past+N,il,0] + + /* { + struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past)); + struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd, + ( n_ctx)*ggml_element_size(kv_self.v), + (il*n_ctx)*ggml_element_size(kv_self.v)*n_embd + n_past*ggml_element_size(kv_self.v)); + + // important: storing RoPE-ed version of K in the KV cache! + ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v)); + } //*/ + + kc = ggml_set_1d(ctx0, kc, ggml_reshape_1d(ctx0, Kcur, n_embd*N), (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past)); + vc = ggml_set_2d(ctx0, vc, Vcur, ( n_ctx)*ggml_element_size(kv_self.v), + (il*n_ctx)*ggml_element_size(kv_self.v)*n_embd + n_past*ggml_element_size(kv_self.v)); + } + + // Qcur shape [n_embd/n_head, n_head, N, 1] + // Q shape [n_embd/n_head, N, n_head, 1] + struct ggml_tensor * Q = + ggml_permute(ctx0, + Qcur, + 0, 2, 1, 3); + + // kv_self.k shape [n_embd * n_ctx * n_layer, 1] + // K shape [n_embd/n_head, n_past + N, n_head, 1] + struct ggml_tensor * K = + ggml_permute(ctx0, + ggml_reshape_3d(ctx0, + ggml_view_1d(ctx0, kc, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(kc)*n_embd), + n_embd/n_head, n_head, n_past + N), + 0, 2, 1, 3); + + // K * Q + // KQ shape [n_past + N, N, n_head, 1] + struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + // KQ_scaled shape [n_past + N, N, n_head, 1] + struct ggml_tensor * KQ_scaled = + ggml_scale(ctx0, + KQ, + ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head))); + + // KQ_masked = mask_past(KQ_scaled) + // KQ_masked shape [n_past + N, N, n_head, 1] + struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled, n_past); + + // KQ = soft_max(KQ_masked) + // KQ_soft_max shape [n_past + N, N, n_head, 1] + struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ_masked); + + // split cached V into n_head heads + //// V shape [n_past + N, n_embd/n_head, n_head, 1] + // V shape [n_past + N, n_embd/n_head, n_head, 1] == kv_self.v[:,:(n_past+N),il,1] + struct ggml_tensor * V = + ggml_view_3d(ctx0, vc, + n_past + N, n_embd/n_head, n_head, + n_ctx*ggml_element_size(vc), + n_ctx*ggml_element_size(vc)*n_embd/n_head, + il*n_ctx*ggml_element_size(vc)*n_embd); + + // KQV shape [n_embd/n_head, N, n_head, 1] + struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + // KQV_merged shape [n_embd/n_head, n_head, N, 1] + struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + // KQV_merged shape + + // cur = KQV_merged.contiguous().view(n_embd, N) + // cur shape [n_embd,N,1,1] + cur = ggml_reshape_2d(ctx0, ggml_cont(ctx0, KQV_merged), n_embd, N); + // cur = ggml_cpy(ctx0, + // KQV_merged, + // ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection (no bias) + // cur shape [n_embd,N,1,1] + cur = ggml_mul_mat(ctx0, + model->layers[il].woa, + ggml_mul_mat(ctx0, + model->layers[il].wob, + cur)); + } + + // inpFF shape [n_embd,N,1,1] + struct ggml_tensor * inpFF = ggml_add(ctx0, cur, inpSA); + + // feed-forward network + { + // norm + { + // cur shape [n_embd,N,1,1] + cur = ggml_rms_norm(ctx0, inpFF); + + // cur = ffn_norm*cur + // cur shape [n_embd,N,1,1] + cur = ggml_mul(ctx0, + ggml_repeat(ctx0, model->layers[il].ffn_norm, cur), + cur); + } + + // tmp shape [n_ff,N,1,1] + struct ggml_tensor * tmp = ggml_mul_mat(ctx0, + model->layers[il].w3, + cur); + + // cur shape [n_ff,N,1,1] + cur = ggml_mul_mat(ctx0, + model->layers[il].w1, + cur); + + // SILU activation + // cur shape [n_ff,N,1,1] + cur = ggml_silu(ctx0, cur); + + // cur shape [n_ff,N,1,1] + cur = ggml_mul(ctx0, cur, tmp); + + // cur shape [n_embd,N,1,1] + cur = ggml_mul_mat(ctx0, + model->layers[il].w2, + cur); + } + + // cur shape [n_embd,N,1,1] + cur = ggml_add(ctx0, cur, inpFF); + + // input for next layer + // inpL shape [n_embd,N,1,1] + inpL = cur; + } + + // norm + { + + // inpL shape [n_embd,N,1,1] + inpL = ggml_rms_norm(ctx0, inpL); + + // inpL = norm*inpL + // inpL shape [n_embd,N,1,1] + inpL = ggml_mul(ctx0, + ggml_repeat(ctx0, model->norm, inpL), + inpL); + + //embeddings = inpL; + } + + + // lm_head + // inpL shape [n_vocab,N,1,1] + inpL = ggml_mul_mat(ctx0, + model->outputa, + ggml_mul_mat(ctx0, + model->outputb, + inpL)); + + // ggml_set_scratch(ctx0, { 0, 0, nullptr, }); + // run the computation + ggml_build_forward_expand(gf, inpL); + + return inpL; +} + +void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) { + assert(logits->n_dims == 2); + assert(probs->n_dims == 2); + assert(best_samples->n_dims == 1); + assert(logits->ne[1] == best_samples->ne[0]); + assert(logits->ne[0] == probs->ne[0]); + assert(logits->ne[1] == probs->ne[1]); + for (int i = 0; i < logits->ne[1]; ++i) { + float max_logit = ggml_get_f32_1d(logits, i * logits->ne[0]); + ggml_set_i32_1d(best_samples, i, 0); + for (int k = 0; k < logits->ne[0]; ++k) { + float logit = ggml_get_f32_1d(logits, i * logits->ne[0] + k); + if (logit > max_logit) { + max_logit = logit; + ggml_set_i32_1d(best_samples, i, k); + } + } + float psum = 0; + for (int k = 0; k < logits->ne[0]; ++k) { + float logit = ggml_get_f32_1d(logits, i * logits->ne[0] + k); + float p = (logit == -INFINITY) ? 0 : expf(logit - max_logit); + psum += p; + ggml_set_f32_1d(probs, i * probs->ne[0] + k, p); + } + for (int k = 0; k < logits->ne[0]; ++k) { + float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); + ggml_set_f32_1d(probs, i * probs->ne[0] + k, p / psum); + } + } +} + +void sample_softmax_batch(struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) { + GGML_ASSERT(best_samples->n_dims == 2); + GGML_ASSERT(logits->n_dims == 3); + GGML_ASSERT(probs->n_dims == 3); + int n_tokens = best_samples->ne[0]; + int n_batch = best_samples->ne[1]; + int n_vocab = logits->ne[0]; + GGML_ASSERT(n_tokens == logits->ne[1]); + GGML_ASSERT(n_batch == logits->ne[2]); + GGML_ASSERT(n_vocab == probs->ne[0]); + GGML_ASSERT(n_tokens == probs->ne[1]); + GGML_ASSERT(n_batch == probs->ne[2]); + + for (int k = 0; k < n_batch; ++k) { + struct ggml_tensor * best_samples_k = ggml_view_1d(ctx, + best_samples, + best_samples->ne[0], + k*best_samples->nb[1]); + struct ggml_tensor * logits_k = ggml_view_2d(ctx, + logits, + logits->ne[0], + logits->ne[1], + logits->nb[1], + k*logits->nb[2]); + struct ggml_tensor * probs_k = ggml_view_2d(ctx, + probs, + probs->ne[0], + probs->ne[1], + probs->nb[1], + k*probs->nb[2]); + sample_softmax(logits_k, probs_k, best_samples_k); + } +} + +void print_row(struct ggml_tensor * probs, int i) { + for (int k = 0; k < probs->ne[0]; ++k) { + float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); + printf(" %.2f", p); + } + printf("\n"); +} + +void print_matrix(struct ggml_tensor * probs) { + assert(probs->n_dims == 2); + for (int i = 0; i < probs->ne[1]; ++i) { + for (int k = 0; k < probs->ne[0]; ++k) { + float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); + printf(" %.2f", p); + } + printf("\n"); + } +} + +void print_token(int token, int n_vocab) { + for (int k = 0; k < token; ++k) { + printf(" "); + } + printf("X"); + for (int k = token+1; k < n_vocab; ++k) { + printf(" "); + } + printf("\n"); +} + +void print_tokens(struct ggml_tensor * tokens, int n_vocab) { + for (int i=0; ine[0]; ++i) { + int token = ggml_get_i32_1d(tokens, i); + print_token(token, n_vocab); + } +} + +void get_example_targets(int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets) { + int n_tokens = tokens_input->ne[0]; + int n_vocab = targets->ne[0]; + float randomness = 0.0f; + // ggml_set_zero(targets); + ggml_set_f32(targets, -1.0f); + ggml_set_i32_1d(tokens_input, 0, 0); + for (int i=1; i 1.0f) ? 1.0f : z; // clamp to [0..1] + int token = std::max(1,std::min(1+(int)(z*(float)(n_vocab-1)), n_vocab-1)); + ggml_set_f32_1d(targets, (i-1)*n_vocab + token, +1.0f); + if (in_dims == 2); + GGML_ASSERT( targets->n_dims == 3); + int n_tokens = tokens_input->ne[0]; + int n_batch = tokens_input->ne[1]; + GGML_ASSERT(n_tokens == targets->ne[1]); + GGML_ASSERT(n_batch == targets->ne[2]); + + for (int k=0; kne[0], + k*tokens_input->nb[1]); + struct ggml_tensor * targets_k = ggml_view_2d(ctx, + targets, + targets->ne[0], + targets->ne[1], + targets->nb[1], + k*targets->nb[2]); + get_example_targets(example_id*n_batch + k, tokens_input_k, targets_k); + } +} + +void lshift_examples(struct ggml_tensor * tokens_input, struct ggml_tensor * targets, int n_shift) { + int n_tokens = tokens_input->ne[0]; + int n_vocab = targets->ne[0]; + for (int i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +float tensor_sum_elements(const ggml_tensor * tensor) { + float sum = 0; + if (tensor->type==GGML_TYPE_F32) { + for (int j = 0; j < tensor->ne[1]; j++) { + for (int k = 0; k < tensor->ne[0]; k++) { + sum += ((float *) tensor->data)[j*tensor->ne[0]+k]; + } + } + } + return sum; +} + +void tensor_dump(const ggml_tensor * tensor, const char * name) { + printf("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi) - ", name, + tensor->type, ggml_type_name(tensor->type), + tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->nb[0], tensor->nb[1], tensor->nb[2]); + float sum = tensor_sum_elements(tensor); + printf("Sum of tensor %s is %6.2f\n", name, sum); +} + +#define TENSOR_DUMP(tensor) tensor_dump(tensor, #tensor) + +struct benchmark_params_struct { + int32_t n_threads = 1; + int32_t n_iterations = 10; +}; + +void print_usage(int /*argc*/, char ** argv, struct benchmark_params_struct params) { + fprintf(stderr, "usage: %s [options]\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "options:\n"); + fprintf(stderr, " -h, --help show this help message and exit\n"); + fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); + fprintf(stderr, " -i N, --iter N number of iterations to use during computation (default: %d)\n", params.n_iterations); + fprintf(stderr, "\n"); +} + +int main(int argc, char ** argv) { + struct benchmark_params_struct benchmark_params; + + bool invalid_param = false; + std::string arg; + for (int i = 1; i < argc; i++) { + arg = argv[i]; + + if (arg == "-t" || arg == "--threads") { + if (++i >= argc) { + invalid_param = true; + break; + } + benchmark_params.n_threads = std::stoi(argv[i]); + } else if (arg == "-i" || arg == "--iter") { + if (++i >= argc) { + invalid_param = true; + break; + } + benchmark_params.n_iterations = std::stoi(argv[i]); + } else if (arg == "-h" || arg == "--help") { + print_usage(argc, argv, benchmark_params); + exit(0); + } + } + if (invalid_param) { + fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str()); + print_usage(argc, argv, benchmark_params); + exit(1); + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + printf("Starting Test\n"); + + // create the ggml context + struct ggml_context * ctx; + //const int sizex = 4096; + //const int sizey = 11008; + +#undef VERBOSE_DEBUGGING +#ifndef VERBOSE_DEBUGGING + const int sizey = 4096; + const int sizex = 11008; + const int sizez = 128; +#else + /* Working - let's increase size */ + const int sizey = 1; + const int sizex = (8*32); + const int sizez = 1; + + /*const int sizey = 1; + const int sizex = 3*(8*32); + const int sizez = 1;*/ +#endif + + //printf("Memsize required = %i\n", sizex*sizex); + + size_t ctx_size = 0; + ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); + ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); + ctx_size += sizex*sizez*ggml_type_sizef(GGML_TYPE_F32); + ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_Q4_0); + ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_Q4_0); + ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS + ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS + ctx_size += 1024*1024*16; + + printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024)); + + struct ggml_init_params params = { + /*.mem_size =*/ ctx_size, + /*.mem_buffer =*/ NULL, + /* no_alloc =*/ 0 + }; + + ctx = ggml_init(params); + if (!ctx) { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return 1; + } + + + printf("Creating new tensors\n"); + // printf("Creating new tensor m1\n"); + struct ggml_tensor * m11 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizex, sizey); + ggml_set_f32(m11, 1.0f); + + // printf("Creating new tensor m1\n"); + struct ggml_tensor * m12 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizex, sizey); + ggml_set_f32(m12, 1.5f); + + // printf("Creating new tensor m2\n"); + struct ggml_tensor * m2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, sizex, sizez); + ggml_set_f32(m2, 2.0f); + + printf("\n------ Test 1 - Matrix Mult via F32 code ------------------------------------------------------------------------------\n"); + // printf("Creating new tensor m11xm2\n"); + struct ggml_tensor * m11xm2 = ggml_mul_mat(ctx, m11, m2); + + // printf("Creating compute graph\n"); + struct ggml_cgraph gf = ggml_build_forward(m11xm2); + + gf.n_threads=benchmark_params.n_threads; + printf("cgraph->n_threads=%i\n",gf.n_threads); + + TENSOR_DUMP(m11); + TENSOR_DUMP(m2); + + ggml_graph_compute(ctx, &gf); + + TENSOR_DUMP(gf.nodes[0]); + + printf("\n------ Test 2 - Matrix Mult via Q4_0 code ------------------------------------------------------------------------------\n"); + + int32_t nelements = sizex*sizey; + int32_t ne[2] = { sizex, sizey }; + + std::vector hist_cur(1 << 4, 0); + + // Set up a the benchmark matrices + // printf("Creating new tensor q11 & Running quantize\n"); + struct ggml_tensor * q11 = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, sizex, sizey); + ggml_quantize_q4_0((const float *) m11->data, q11->data, nelements, ne[0], hist_cur.data()); + + // Set up a the compute graph + // printf("Creating new tensor q31\n"); + struct ggml_tensor * q31 = ggml_mul_mat(ctx, q11, m2); + + // printf("Creating compute graph\n"); + struct ggml_cgraph gf31 = ggml_build_forward(q31); + gf31.n_threads=benchmark_params.n_threads; + + // Set up a second graph computation to make sure we override the CPU cache lines + // printf("Creating new tensor q12 & Running quantize\n"); + struct ggml_tensor * q12 = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, sizex, sizey); + ggml_quantize_q4_0((const float *) m12->data, q12->data, nelements, ne[0], hist_cur.data()); + + // printf("Creating new tensor q32\n"); + struct ggml_tensor * q32 = ggml_mul_mat(ctx, q12, m2); + + //printf("Creating compute graph\n"); + struct ggml_cgraph gf32 = ggml_build_forward(q32); + gf32.n_threads=benchmark_params.n_threads; + printf("cgraph->n_threads=%i\n",gf31.n_threads); + + const int dimx = sizex; + const int dimy = sizey; + const int dimz = sizez; + long long int flops_per_dot_product = dimy + dimy; + long long int flops_per_matrix = flops_per_dot_product * dimx * dimz; ; + printf("Matrix Multiplication of (%i,%i,%i) x (%i,%i,%i) - about %6.2f gFLOPS\n\n", sizex, sizey, 1, sizex, sizez, 1, 1.0f*flops_per_matrix / 1000 / 1000 / 1000); + + + // Let's use the F32 result from above as a reference for the q4_0 multiplication + float sum_of_F32_reference = tensor_sum_elements(gf.nodes[0]); + + printf("Iteration;NThreads; SizeX; SizeY; SizeZ; Required_FLOPS; Elapsed_u_Seconds; gigaFLOPS\n"); + printf("=====================================================================================\n"); + + double gflops_sum = 0; + for (int i=0;i allowed_delta) { + printf("\nABORT - ERROR in Matrix Multiplication result - expected %6.2f, got %6.2f (delta %6.2f > allowed_delta %6.2f)\n", + sum_of_F32_reference, + sum_of_Q4_result, + delta, + allowed_delta + ); + exit(0); + } + + // Running a different graph computation to make sure we override the CPU cache lines + ggml_graph_compute(ctx, &gf32); + } + printf("\n"); + printf("Average%78.2f\n",gflops_sum/((double)benchmark_params.n_iterations)); + printf("=====================================================================================\n"); +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/chat-13B.bat b/ctransformers/models/submodules/ggllm.cpp/examples/chat-13B.bat new file mode 100644 index 0000000000000000000000000000000000000000..c5c8ac6efa81a552725538648592e3fc1563e1fa --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/chat-13B.bat @@ -0,0 +1,57 @@ +@setlocal disabledelayedexpansion enableextensions +@echo off + +cd /d "%~dp0.." +if not "%errorlevel%"=="0" ( + echo Unable to change directory. + pause + exit /b 1 +) + +if not defined MODEL set "MODEL=models\13B\ggml-model-q4_0.bin" +if not defined USER_NAME set "USER_NAME=User" +if not defined AI_NAME set "AI_NAME=ChatLLaMa" +rem Adjust to the number of CPU cores you want to use. +rem if not defined N_THREAD set "N_THREAD=8" +rem Number of tokens to predict (made it larger than default because we want a long interaction) +if not defined N_PREDICTS set "N_PREDICTS=2048" +if not defined GEN_OPTIONS set "GEN_OPTIONS=--ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 --repeat_penalty 1.17647" + +rem Default main script paths +set "DEFAULT_MAIN_SCRIPT_PATHS=main.exe build\bin\main.exe" + +rem Get main script path from command line arguments +set "MAIN_SCRIPT_PATH=%~1" + +rem If the main script path was not specified, try the default paths +if not defined MAIN_SCRIPT_PATH ( + for %%i in (%DEFAULT_MAIN_SCRIPT_PATHS%) do ( + if exist "%%i" set "MAIN_SCRIPT_PATH=%%i" + ) +) + +rem If the main script path was not found, tell the user how to specify it +if not defined MAIN_SCRIPT_PATH ( + echo The main script could not be found. Please provide the path to the main script as 1st argument to this script, or place the main script in one of the default locations: + echo %DEFAULT_MAIN_SCRIPT_PATHS% + pause + exit /b 1 +) + +rem Default context, feel free to edit it +set "PROMPT_TEXT=Text transcript of a never ending dialog, where %USER_NAME% interacts with an AI assistant named %AI_NAME%. %AI_NAME% is helpful, kind, honest, friendly, good at writing and never fails to answer %USER_NAME%'s requests immediately and with details and precision. There are no annotations like (30 seconds passed...) or (to himself), just what %USER_NAME% and %AI_NAME% say aloud to each other. The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long. The transcript only includes text, it does not include markup like HTML and Markdown." + +rem Set a temporary variable if N_THREAD is set +if defined N_THREAD ( + set "_N_THREAD=--threads %N_THREAD%" +) else ( + set "_N_THREAD=" +) + +rem Run the script +echo "%MAIN_SCRIPT_PATH%" %GEN_OPTIONS% %_N_THREAD% ^ + --model "%MODEL%" ^ + --n_predict %N_PREDICTS% ^ + --color --interactive ^ + --reverse-prompt "%USER_NAME%:" ^ + --prompt "%PROMPT_TEXT%" diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/chat-13B.sh b/ctransformers/models/submodules/ggllm.cpp/examples/chat-13B.sh new file mode 100644 index 0000000000000000000000000000000000000000..35c089d57d253ae828322f550f08f102f1aed59d --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/chat-13B.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +set -e + +cd "$(dirname "$0")/.." || exit + +MODEL="${MODEL:-./models/13B/ggml-model-q4_0.bin}" +PROMPT_TEMPLATE=${PROMPT_TEMPLATE:-./prompts/chat.txt} +USER_NAME="${USER_NAME:-USER}" +AI_NAME="${AI_NAME:-ChatLLaMa}" + +# Adjust to the number of CPU cores you want to use. +N_THREAD="${N_THREAD:-8}" +# Number of tokens to predict (made it larger than default because we want a long interaction) +N_PREDICTS="${N_PREDICTS:-2048}" + +# Note: you can also override the generation options by specifying them on the command line: +# For example, override the context size by doing: ./chatLLaMa --ctx_size 1024 +GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 --repeat_penalty 1.17647}" + +DATE_TIME=$(date +%H:%M) +DATE_YEAR=$(date +%Y) + +PROMPT_FILE=$(mktemp -t llamacpp_prompt.XXXXXXX.txt) + +sed -e "s/\[\[USER_NAME\]\]/$USER_NAME/g" \ + -e "s/\[\[AI_NAME\]\]/$AI_NAME/g" \ + -e "s/\[\[DATE_TIME\]\]/$DATE_TIME/g" \ + -e "s/\[\[DATE_YEAR\]\]/$DATE_YEAR/g" \ + $PROMPT_TEMPLATE > $PROMPT_FILE + +# shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS +./main $GEN_OPTIONS \ + --model "$MODEL" \ + --threads "$N_THREAD" \ + --n_predict "$N_PREDICTS" \ + --color --interactive \ + --file ${PROMPT_FILE} \ + --reverse-prompt "${USER_NAME}:" \ + --in-prefix ' ' \ + "$@" diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/chat-persistent.sh b/ctransformers/models/submodules/ggllm.cpp/examples/chat-persistent.sh new file mode 100644 index 0000000000000000000000000000000000000000..e0c251e5b03cc7819320368c1a6ba6879665a941 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/chat-persistent.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +set -euo pipefail + +cd "$(dirname "$0")/.." || exit + +if [[ -z "${PROMPT_CACHE_FILE+x}" || -z "${CHAT_SAVE_DIR+x}" ]]; then + echo >&2 "error: PROMPT_CACHE_FILE and CHAT_SAVE_DIR must be provided" + exit 1 +fi + +MODEL="${MODEL:-./models/13B/ggml-model-q4_0.bin}" +PROMPT_TEMPLATE="${PROMPT_TEMPLATE:-./prompts/chat.txt}" +USER_NAME="${USER_NAME:-User}" +AI_NAME="${AI_NAME:-ChatLLaMa}" +DATE_TIME="$(date +%H:%M)" +DATE_YEAR="$(date +%Y)" + +LOG="${CHAT_SAVE_DIR}/main.log" +LOG_BG="${CHAT_SAVE_DIR}/main-bg.log" +CUR_PROMPT_FILE="${CHAT_SAVE_DIR}/current-prompt.txt" +CUR_PROMPT_CACHE="${CHAT_SAVE_DIR}/current-cache.bin" +NEXT_PROMPT_FILE="${CHAT_SAVE_DIR}/next-prompt.txt" +NEXT_PROMPT_CACHE="${CHAT_SAVE_DIR}/next-cache.bin" + +SESSION_SIZE_MSG_PATTERN='main: session file matches [[:digit:]]+ / [[:digit:]]+' +SAMPLE_TIME_MSG_PATTERN='sample time =[[:space:]]+[[:digit:]]+.[[:digit:]]+ ms /[[:space:]]+[[:digit:]]+' +SED_DELETE_MESSAGES="/^(${USER_NAME}:|${AI_NAME}:|\\.\\.\\.)/,\$d" + +CTX_SIZE=2048 +CTX_ROTATE_POINT=$((CTX_SIZE * 3 / 5)) # REVIEW +OPTS=(--model "$MODEL" --ctx_size "$CTX_SIZE" --repeat_last_n 256 "$@") + +# An unbuffered `tail -c+N` +skip_bytes() { + LANG=C IFS= read -r -n "$1" -d '' c + while LANG=C IFS= read -r -n 1 -d '' c; do + printf '%s' "$c" + done +} + +mkdir -p "$CHAT_SAVE_DIR" +echo >"$LOG" +trap "tail -n100 ${LOG}" EXIT + +if [[ ! -e "$CUR_PROMPT_FILE" ]]; then + sed -e "s/\[\[USER_NAME\]\]/${USER_NAME}/g" \ + -e "s/\[\[AI_NAME\]\]/${AI_NAME}/g" \ + -e "s/\[\[DATE_TIME\]\]/${DATE_TIME}/g" \ + -e "s/\[\[DATE_YEAR\]\]/${DATE_YEAR}/g" \ + "$PROMPT_TEMPLATE" >"$CUR_PROMPT_FILE" +fi + +if [[ ! -e "$NEXT_PROMPT_FILE" ]]; then + sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE" +fi + +if [[ "$(tail -c4 "$NEXT_PROMPT_FILE")" != "..." ]]; then + echo '...' >>"$NEXT_PROMPT_FILE" +fi + +if [[ ! -e "$PROMPT_CACHE_FILE" ]]; then + echo 'Prompt cache does not exist, building...' + # Default batch_size to 8 here for better user feedback during initial prompt processing + ./main 2>>"$LOG" \ + --batch_size 8 \ + "${OPTS[@]}" \ + --prompt-cache "$PROMPT_CACHE_FILE" \ + --file "$CUR_PROMPT_FILE" \ + --n_predict 1 + echo + echo 'Done!' +fi + +if [[ ! -e "$CUR_PROMPT_CACHE" ]]; then + cp "$PROMPT_CACHE_FILE" "$CUR_PROMPT_CACHE" +fi +if [[ ! -e "$NEXT_PROMPT_CACHE" ]]; then + cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE" +fi + +printf '%s ' "$(< "$CUR_PROMPT_FILE")" +n_tokens=0 + +while read -e line; do + # Limit generation to remaining context, with a buffer and estimating 2 chars/token for input + n_predict=$((CTX_SIZE - n_tokens - ${#line} / 2 - 32)) + + # Swap prompts when we're about to run out of context + if ((n_predict <= 0)); then + wait # for background main (below) to finish with next prompt + mv "$NEXT_PROMPT_FILE" "$CUR_PROMPT_FILE" + mv "$NEXT_PROMPT_CACHE" "$CUR_PROMPT_CACHE" + + sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE" + echo '...' >>"$NEXT_PROMPT_FILE" + cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE" + + n_tokens=0 + n_predict=$((CTX_SIZE / 2)) + fi + + echo " ${line}" >>"$CUR_PROMPT_FILE" + if ((n_tokens > CTX_ROTATE_POINT)); then + echo " ${line}" >>"$NEXT_PROMPT_FILE" + fi + + n_prompt_len_pre=$(($(wc -c <"$CUR_PROMPT_FILE"))) + + printf '%s: ' "$AI_NAME" >>"$CUR_PROMPT_FILE" + + ./main 2>>"$LOG" "${OPTS[@]}" \ + --prompt-cache "$CUR_PROMPT_CACHE" \ + --prompt-cache-all \ + --file "$CUR_PROMPT_FILE" \ + --reverse-prompt "${USER_NAME}:" \ + --n_predict "$n_predict" | + skip_bytes 1 | # skip BOS token added by ./main + tee "$CUR_PROMPT_FILE.tmp" | # save prompt + generation to tmp file + skip_bytes "$n_prompt_len_pre" # print generation + + mv "$CUR_PROMPT_FILE.tmp" "$CUR_PROMPT_FILE" + + # if we hit n_predict instead of reverse-prompt, we need to add the prompt + if [[ "$(tail -n1 "$CUR_PROMPT_FILE")" != "${USER_NAME}:" ]]; then + printf '\n%s:' "$USER_NAME" + printf '\n%s:' "$USER_NAME" >> "$CUR_PROMPT_FILE" + fi + + printf ' ' + + # HACK get num tokens from debug message + # TODO get both messages in one go + if ! session_size_msg="$(tail -n30 "$LOG" | grep -oE "$SESSION_SIZE_MSG_PATTERN")" || + ! sample_time_msg="$( tail -n10 "$LOG" | grep -oE "$SAMPLE_TIME_MSG_PATTERN")"; then + echo >&2 "Couldn't get number of tokens from ./main output!" + exit 1 + fi + + n_tokens=$(($(cut -d/ -f2 <<<"$session_size_msg") + $(cut -d/ -f2 <<<"$sample_time_msg"))) + + if ((n_tokens > CTX_ROTATE_POINT)); then + tail -c+$((n_prompt_len_pre + 1)) "$CUR_PROMPT_FILE" >>"$NEXT_PROMPT_FILE" + fi + + # Update cache for next prompt in background, ideally during user input + ./main >>"$LOG_BG" 2>&1 "${OPTS[@]}" \ + --prompt-cache "$NEXT_PROMPT_CACHE" \ + --file "$NEXT_PROMPT_FILE" \ + --n_predict 1 & +done diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/chat-vicuna.sh b/ctransformers/models/submodules/ggllm.cpp/examples/chat-vicuna.sh new file mode 100644 index 0000000000000000000000000000000000000000..8c7b7bef42784d3037c377e71fc20e08a7302883 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/chat-vicuna.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +set -e + +cd "$(dirname "$0")/.." || exit + +MODEL="${MODEL:-./models/ggml-vic13b-uncensored-q5_0.bin}" +PROMPT_TEMPLATE=${PROMPT_TEMPLATE:-./prompts/chat.txt} +USER_NAME="### Human" +AI_NAME="### Assistant" + +# Adjust to the number of CPU cores you want to use. +N_THREAD="${N_THREAD:-8}" +# Number of tokens to predict (made it larger than default because we want a long interaction) +N_PREDICTS="${N_PREDICTS:-2048}" + +# Note: you can also override the generation options by specifying them on the command line: +# For example, override the context size by doing: ./chatLLaMa --ctx_size 1024 +GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 --repeat_penalty 1.17647}" + +DATE_TIME=$(date +%H:%M) +DATE_YEAR=$(date +%Y) + +PROMPT_FILE=$(mktemp -t llamacpp_prompt.XXXXXXX.txt) + +sed -e "s/\[\[USER_NAME\]\]/$USER_NAME/g" \ + -e "s/\[\[AI_NAME\]\]/$AI_NAME/g" \ + -e "s/\[\[DATE_TIME\]\]/$DATE_TIME/g" \ + -e "s/\[\[DATE_YEAR\]\]/$DATE_YEAR/g" \ + $PROMPT_TEMPLATE > $PROMPT_FILE + +# shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS +./bin/main $GEN_OPTIONS \ + --model "$MODEL" \ + --threads "$N_THREAD" \ + --n_predict "$N_PREDICTS" \ + --color --interactive \ + --file ${PROMPT_FILE} \ + --reverse-prompt "### Human:" \ + --in-prefix ' ' \ + "$@" diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/chat.sh b/ctransformers/models/submodules/ggllm.cpp/examples/chat.sh new file mode 100644 index 0000000000000000000000000000000000000000..9a928ef05431a847efa476433b19d24ca88c15ca --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/chat.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# +# Temporary script - will be removed in the future +# + +cd `dirname $0` +cd .. + +# Important: +# +# "--keep 48" is based on the contents of prompts/chat-with-bob.txt +# +./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n 256 --keep 48 \ + --repeat_penalty 1.0 --color -i \ + -r "User:" -f prompts/chat-with-bob.txt diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/common.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fed24e027d8a814146defdb07f02837e172c3664 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/common.cpp @@ -0,0 +1,947 @@ +#include "common.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__APPLE__) && defined(__MACH__) +#include +#include +#endif + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#include +#include +#else +#include +#include +#include +#endif + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +int32_t get_num_physical_cores() { +#ifdef __linux__ + // enumerate the set of thread siblings, num entries is num cores + std::unordered_set siblings; + for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) { + std::ifstream thread_siblings("/sys/devices/system/cpu" + + std::to_string(cpu) + "/topology/thread_siblings"); + if (!thread_siblings.is_open()) { + break; // no more cpus + } + std::string line; + if (std::getline(thread_siblings, line)) { + siblings.insert(line); + } + } + if (siblings.size() > 0) { + return static_cast(siblings.size()); + } +#elif defined(__APPLE__) && defined(__MACH__) + int32_t num_physical_cores; + size_t len = sizeof(num_physical_cores); + int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } + result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } +#elif defined(_WIN32) + //TODO: Implement +#endif + unsigned int n_threads = std::thread::hardware_concurrency(); + return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; +} + +void process_escapes(std::string& input) { + std::size_t input_len = input.length(); + std::size_t output_idx = 0; + + for (std::size_t input_idx = 0; input_idx < input_len; ++input_idx) { + if (input[input_idx] == '\\' && input_idx + 1 < input_len) { + switch (input[++input_idx]) { + case 'n': input[output_idx++] = '\n'; break; + case 'r': input[output_idx++] = '\r'; break; + case 't': input[output_idx++] = '\t'; break; + case '\'': input[output_idx++] = '\''; break; + case '\"': input[output_idx++] = '\"'; break; + case '\\': input[output_idx++] = '\\'; break; + default: input[output_idx++] = '\\'; + input[output_idx++] = input[input_idx]; break; + } + } else { + input[output_idx++] = input[input_idx]; + } + } + + input.resize(output_idx); +} + +bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { + bool invalid_param = false; + bool escape_prompt = false; + std::string arg; + gpt_params default_params; + const std::string arg_prefix = "--"; + + for (int i = 1; i < argc; i++) { + arg = argv[i]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); + } + + if (arg == "-s" || arg == "--seed") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.seed = std::stoi(argv[i]); + } else if (arg == "-t" || arg == "--threads") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_threads = std::stoi(argv[i]); + } else if (arg == "-p" || arg == "--prompt") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.prompt = argv[i]; + } else if (arg == "-e") { + escape_prompt = true; + } else if (arg == "--prompt-cache") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.path_prompt_cache = argv[i]; + } else if (arg == "--prompt-cache-all") { + params.prompt_cache_all = true; + } else if (arg == "--prompt-cache-ro") { + params.prompt_cache_ro = true; + } else if (arg == "-f" || arg == "--file") { + if (++i >= argc) { + invalid_param = true; + break; + } + std::ifstream file(argv[i]); + if (!file) { + fprintf(stderr, "error: failed to open file '%s'\n", argv[i]); + invalid_param = true; + break; + } + std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); + if (params.prompt.back() == '\n') { + params.prompt.pop_back(); + } + } else if (arg == "-n" || arg == "--n-predict") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_predict = std::stoi(argv[i]); + } else if (arg == "--top-k") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.top_k = std::stoi(argv[i]); + } else if (arg == "-c" || arg == "--ctx-size") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_ctx = std::stoi(argv[i]); + } else if (arg == "--memory-f32") { + params.memory_f16 = false; + } else if (arg == "--top-p") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.top_p = std::stof(argv[i]); + } else if (arg == "--temp") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.temp = std::stof(argv[i]); + } else if (arg == "--tfs") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.tfs_z = std::stof(argv[i]); + } else if (arg == "--typical") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.typical_p = std::stof(argv[i]); + } else if (arg == "--repeat-last-n") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.repeat_last_n = std::stoi(argv[i]); + } else if (arg == "--repeat-penalty") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.repeat_penalty = std::stof(argv[i]); + } else if (arg == "--frequency-penalty") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.frequency_penalty = std::stof(argv[i]); + } else if (arg == "--presence-penalty") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.presence_penalty = std::stof(argv[i]); + } else if (arg == "--mirostat") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.mirostat = std::stoi(argv[i]); + } else if (arg == "--mirostat-lr") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.mirostat_eta = std::stof(argv[i]); + } else if (arg == "--mirostat-ent") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.mirostat_tau = std::stof(argv[i]); + } else if (arg == "-b" || arg == "--batch-size") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_batch = std::stoi(argv[i]); + params.n_batch = std::min(512, params.n_batch); + } else if (arg == "--keep") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_keep = std::stoi(argv[i]); + } else if (arg == "-m" || arg == "--model") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.model = argv[i]; + } else if (arg == "-a" || arg == "--alias") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.model_alias = argv[i]; + } else if (arg == "--lora") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.lora_adapter = argv[i]; + params.use_mmap = false; + } else if (arg == "--lora-base") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.lora_base = argv[i]; + } else if (arg == "-i" || arg == "--interactive") { + params.interactive = true; + } else if (arg == "--embedding") { + params.embedding = true; + } else if (arg == "--interactive-first") { + params.interactive_first = true; + } else if (arg == "-ins" || arg == "--instruct") { + params.instruct = true; + } else if (arg == "--multiline-input") { + params.multiline_input = true; + } else if (arg == "--color") { + params.use_color = true; + } else if (arg == "--mlock") { + params.use_mlock = true; + } else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") { + if (++i >= argc) { + invalid_param = true; + break; + } +#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD + params.n_gpu_layers = std::stoi(argv[i]); +#else + fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); +#endif + } else if (arg == "--main-gpu" || arg == "-mg") { + if (++i >= argc) { + invalid_param = true; + break; + } +#ifdef GGML_USE_CUBLAS + params.main_gpu = std::stoi(argv[i]); +#else + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.\n"); +#endif + } else if (arg == "--tensor-split" || arg == "-ts") { + if (++i >= argc) { + invalid_param = true; + break; + } +#ifdef GGML_USE_CUBLAS + std::string arg_next = argv[i]; + + // split string by , and / + const std::regex regex{R"([,/]+)"}; + std::sregex_token_iterator it{arg_next.begin(), arg_next.end(), regex, -1}; + std::vector split_arg{it, {}}; + GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES); + + for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) { + if (i < split_arg.size()) { + params.tensor_split[i] = std::stof(split_arg[i]); + } else { + params.tensor_split[i] = 0.0f; + } + } +#else + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n"); +#endif // GGML_USE_CUBLAS + } else if (arg == "--low-vram" || arg == "-lv") { +#ifdef GGML_USE_CUBLAS + params.low_vram = true; +#else + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set lower vram usage.\n"); +#endif // GGML_USE_CUBLAS + } else if (arg == "--no-mmap") { + params.use_mmap = false; + } else if (arg == "--mtest") { + params.mem_test = true; + } else if (arg == "--export") { + params.export_cgraph = true; + } else if (arg == "--verbose-prompt") { + params.verbose_prompt = true; + } else if (arg == "-r" || arg == "--reverse-prompt") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.antiprompt.push_back(argv[i]); + } else if (arg == "--perplexity") { + params.perplexity = true; + } else if (arg == "--ignore-eos") { + params.logit_bias[llama_token_eos()] = -INFINITY; + } else if (arg == "--no-penalize-nl") { + params.penalize_nl = false; + } else if (arg == "-l" || arg == "--logit-bias") { + if (++i >= argc) { + invalid_param = true; + break; + } + std::stringstream ss(argv[i]); + llama_token key; + char sign; + std::string value_str; + try { + if (ss >> key && ss >> sign && std::getline(ss, value_str) && (sign == '+' || sign == '-')) { + params.logit_bias[key] = std::stof(value_str) * ((sign == '-') ? -1.0f : 1.0f); + } else { + throw std::exception(); + } + } catch (const std::exception&) { + invalid_param = true; + break; + } + } else if (arg == "-h" || arg == "--help") { + gpt_print_usage(argc, argv, default_params); + exit(0); + } else if (arg == "--random-prompt") { + params.random_prompt = true; + } else if (arg == "--in-prefix") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.input_prefix = argv[i]; + } else if (arg == "--in-suffix") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.input_suffix = argv[i]; + } else { + fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); + gpt_print_usage(argc, argv, default_params); + exit(1); + } + } + if (invalid_param) { + fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str()); + gpt_print_usage(argc, argv, default_params); + exit(1); + } + if (params.prompt_cache_all && + (params.interactive || params.interactive_first || + params.instruct)) { + fprintf(stderr, "error: --prompt-cache-all not supported in interactive mode yet\n"); + gpt_print_usage(argc, argv, default_params); + exit(1); + } + +#ifdef GGML_USE_CUBLAS + if (!params.lora_adapter.empty() && params.n_gpu_layers > 0) { + fprintf(stderr, "%s: error: the simultaneous use of LoRAs and GPU acceleration is not supported", __func__); + exit(1); + } +#endif // GGML_USE_CUBLAS + + if (escape_prompt) { + process_escapes(params.prompt); + } + + return true; +} + +void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { + fprintf(stderr, "usage: %s [options]\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "options:\n"); + fprintf(stderr, " -h, --help show this help message and exit\n"); + fprintf(stderr, " -i, --interactive run in interactive mode\n"); + fprintf(stderr, " --interactive-first run in interactive mode and wait for input right away\n"); + fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n"); + fprintf(stderr, " --multiline-input allows you to write or paste multiple lines without ending each in '\\'\n"); + fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n"); + fprintf(stderr, " halt generation at PROMPT, return control in interactive mode\n"); + fprintf(stderr, " (can be specified more than once for multiple prompts).\n"); + fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n"); + fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n"); + fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); + fprintf(stderr, " -p PROMPT, --prompt PROMPT\n"); + fprintf(stderr, " prompt to start generation with (default: empty)\n"); + fprintf(stderr, " -e process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n"); + fprintf(stderr, " --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n"); + fprintf(stderr, " --prompt-cache-all if specified, saves user input and generations to cache as well.\n"); + fprintf(stderr, " not supported with --interactive or other interactive options\n"); + fprintf(stderr, " --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n"); + fprintf(stderr, " --random-prompt start with a randomized prompt.\n"); + fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n"); + fprintf(stderr, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n"); + fprintf(stderr, " -f FNAME, --file FNAME\n"); + fprintf(stderr, " prompt file to start generation.\n"); + fprintf(stderr, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict); + fprintf(stderr, " --top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k); + fprintf(stderr, " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); + fprintf(stderr, " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); + fprintf(stderr, " --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); + fprintf(stderr, " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); + fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); + fprintf(stderr, " --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); + fprintf(stderr, " --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); + fprintf(stderr, " --mirostat N use Mirostat sampling.\n"); + fprintf(stderr, " Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"); + fprintf(stderr, " (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); + fprintf(stderr, " --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); + fprintf(stderr, " --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); + fprintf(stderr, " -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n"); + fprintf(stderr, " modifies the likelihood of token appearing in the completion,\n"); + fprintf(stderr, " i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n"); + fprintf(stderr, " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n"); + fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx); + fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n"); + fprintf(stderr, " --no-penalize-nl do not penalize newline token\n"); + fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n"); + fprintf(stderr, " not recommended: doubles context memory required and no measurable increase in quality\n"); + fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp); + fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); + fprintf(stderr, " --perplexity compute perplexity over the prompt\n"); + fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); + if (llama_mlock_supported()) { + fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n"); + } + if (llama_mmap_supported()) { + fprintf(stderr, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); + } +#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD + fprintf(stderr, " -ngl N, --n-gpu-layers N\n"); + fprintf(stderr, " number of layers to store in VRAM\n"); + fprintf(stderr, " -ts SPLIT --tensor-split SPLIT\n"); + fprintf(stderr, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); + fprintf(stderr, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n" ); + fprintf(stderr, " -lv, --low-vram don't allocate VRAM scratch buffer\n" ); +#endif + fprintf(stderr, " --mtest compute maximum memory usage\n"); + fprintf(stderr, " --export export the computation graph to 'llama.ggml'\n"); + fprintf(stderr, " --verbose-prompt print prompt before generation\n"); + fprintf(stderr, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n"); + fprintf(stderr, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n"); + fprintf(stderr, " -m FNAME, --model FNAME\n"); + fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); + fprintf(stderr, "\n"); +} + +std::string gpt_random_prompt(std::mt19937 & rng) { + const int r = rng() % 10; + switch (r) { + case 0: return "So"; + case 1: return "Once upon a time"; + case 2: return "When"; + case 3: return "The"; + case 4: return "After"; + case 5: return "If"; + case 6: return "import"; + case 7: return "He"; + case 8: return "She"; + case 9: return "They"; + default: return "To"; + } + + return "The"; +} + +// TODO: not great allocating this every time +std::vector llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) { + // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars + std::vector res(text.size() + (int) add_bos); + const int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos); + assert(n >= 0); + res.resize(n); + + return res; +} + +struct llama_context * llama_init_from_gpt_params(const gpt_params & params) { + auto lparams = llama_context_default_params(); + + lparams.n_ctx = params.n_ctx; + lparams.n_batch = params.n_batch; + lparams.n_gpu_layers = params.n_gpu_layers; + lparams.main_gpu = params.main_gpu; + memcpy(lparams.tensor_split, params.tensor_split, LLAMA_MAX_DEVICES*sizeof(float)); + lparams.low_vram = params.low_vram; + lparams.seed = params.seed; + lparams.f16_kv = params.memory_f16; + lparams.use_mmap = params.use_mmap; + lparams.use_mlock = params.use_mlock; + lparams.logits_all = params.perplexity; + lparams.embedding = params.embedding; + + llama_context * lctx = llama_init_from_file(params.model.c_str(), lparams); + + if (lctx == NULL) { + fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); + return NULL; + } + + if (!params.lora_adapter.empty()) { + int err = llama_apply_lora_from_file(lctx, + params.lora_adapter.c_str(), + params.lora_base.empty() ? NULL : params.lora_base.c_str(), + params.n_threads); + if (err != 0) { + fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__); + return NULL; + } + } + + return lctx; +} + +void console_init(console_state & con_st) { +#if defined(_WIN32) + // Windows-specific console initialization + DWORD dwMode = 0; + con_st.hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + if (con_st.hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(con_st.hConsole, &dwMode)) { + con_st.hConsole = GetStdHandle(STD_ERROR_HANDLE); + if (con_st.hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(con_st.hConsole, &dwMode))) { + con_st.hConsole = NULL; + } + } + if (con_st.hConsole) { + // Enable ANSI colors on Windows 10+ + if (con_st.use_color && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) { + SetConsoleMode(con_st.hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING); + } + // Set console output codepage to UTF8 + SetConsoleOutputCP(CP_UTF8); + } + HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE); + if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) { + // Set console input codepage to UTF16 + _setmode(_fileno(stdin), _O_WTEXT); + + // Turn off ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT) + dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT); + SetConsoleMode(hConIn, dwMode); + } +#else + // POSIX-specific console initialization + struct termios new_termios; + tcgetattr(STDIN_FILENO, &con_st.prev_state); + new_termios = con_st.prev_state; + new_termios.c_lflag &= ~(ICANON | ECHO); + new_termios.c_cc[VMIN] = 1; + new_termios.c_cc[VTIME] = 0; + tcsetattr(STDIN_FILENO, TCSANOW, &new_termios); + + con_st.tty = fopen("/dev/tty", "w+"); + if (con_st.tty != nullptr) { + con_st.out = con_st.tty; + } + + setlocale(LC_ALL, ""); +#endif +} + +void console_cleanup(console_state & con_st) { + // Reset console color + console_set_color(con_st, CONSOLE_COLOR_DEFAULT); + +#if !defined(_WIN32) + if (con_st.tty != nullptr) { + con_st.out = stdout; + fclose(con_st.tty); + con_st.tty = nullptr; + } + // Restore the terminal settings on POSIX systems + tcsetattr(STDIN_FILENO, TCSANOW, &con_st.prev_state); +#endif +} + +/* Keep track of current color of output, and emit ANSI code if it changes. */ +void console_set_color(console_state & con_st, console_color_t color) { + if (con_st.use_color && con_st.color != color) { + fflush(stdout); + switch(color) { + case CONSOLE_COLOR_DEFAULT: + fprintf(con_st.out, ANSI_COLOR_RESET); + break; + case CONSOLE_COLOR_PROMPT: + fprintf(con_st.out, ANSI_COLOR_YELLOW); + break; + case CONSOLE_COLOR_USER_INPUT: + fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN); + break; + case CONSOLE_COLOR_ERROR: + fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED); + break; + } + con_st.color = color; + fflush(con_st.out); + } +} + +char32_t getchar32() { +#if defined(_WIN32) + HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE); + wchar_t high_surrogate = 0; + + while (true) { + INPUT_RECORD record; + DWORD count; + if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) { + return WEOF; + } + + if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) { + wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar; + if (wc == 0) { + continue; + } + + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + high_surrogate = wc; + continue; + } else if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate + if (high_surrogate != 0) { // Check if we have a high surrogate + return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000; + } + } + + high_surrogate = 0; // Reset the high surrogate + return static_cast(wc); + } + } +#else + wchar_t wc = getwchar(); + if (static_cast(wc) == WEOF) { + return WEOF; + } + +#if WCHAR_MAX == 0xFFFF + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + wchar_t low_surrogate = getwchar(); + if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate + return (static_cast(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000; + } + } + if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair + return 0xFFFD; // Return the replacement character U+FFFD + } +#endif + + return static_cast(wc); +#endif +} + +void pop_cursor(console_state & con_st) { +#if defined(_WIN32) + if (con_st.hConsole != NULL) { + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo); + + COORD newCursorPosition = bufferInfo.dwCursorPosition; + if (newCursorPosition.X == 0) { + newCursorPosition.X = bufferInfo.dwSize.X - 1; + newCursorPosition.Y -= 1; + } else { + newCursorPosition.X -= 1; + } + + SetConsoleCursorPosition(con_st.hConsole, newCursorPosition); + return; + } +#endif + putc('\b', con_st.out); +} + +int estimateWidth(char32_t codepoint) { +#if defined(_WIN32) + return 1; +#else + return wcwidth(codepoint); +#endif +} + +int put_codepoint(console_state & con_st, const char* utf8_codepoint, size_t length, int expectedWidth) { +#if defined(_WIN32) + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + if (!GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo)) { + // go with the default + return expectedWidth; + } + COORD initialPosition = bufferInfo.dwCursorPosition; + DWORD nNumberOfChars = length; + WriteConsole(con_st.hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL); + + CONSOLE_SCREEN_BUFFER_INFO newBufferInfo; + GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo); + + // Figure out our real position if we're in the last column + if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) { + DWORD nNumberOfChars; + WriteConsole(con_st.hConsole, &" \b", 2, &nNumberOfChars, NULL); + GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo); + } + + int width = newBufferInfo.dwCursorPosition.X - initialPosition.X; + if (width < 0) { + width += newBufferInfo.dwSize.X; + } + return width; +#else + // we can trust expectedWidth if we've got one + if (expectedWidth >= 0 || con_st.tty == nullptr) { + fwrite(utf8_codepoint, length, 1, con_st.out); + return expectedWidth; + } + + fputs("\033[6n", con_st.tty); // Query cursor position + int x1, x2, y1, y2; + int results = 0; + results = fscanf(con_st.tty, "\033[%d;%dR", &y1, &x1); + + fwrite(utf8_codepoint, length, 1, con_st.tty); + + fputs("\033[6n", con_st.tty); // Query cursor position + results += fscanf(con_st.tty, "\033[%d;%dR", &y2, &x2); + + if (results != 4) { + return expectedWidth; + } + + int width = x2 - x1; + if (width < 0) { + // Calculate the width considering text wrapping + struct winsize w; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + width += w.ws_col; + } + return width; +#endif +} + +void replace_last(console_state & con_st, char ch) { +#if defined(_WIN32) + pop_cursor(con_st); + put_codepoint(con_st, &ch, 1, 1); +#else + fprintf(con_st.out, "\b%c", ch); +#endif +} + +void append_utf8(char32_t ch, std::string & out) { + if (ch <= 0x7F) { + out.push_back(static_cast(ch)); + } else if (ch <= 0x7FF) { + out.push_back(static_cast(0xC0 | ((ch >> 6) & 0x1F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0xFFFF) { + out.push_back(static_cast(0xE0 | ((ch >> 12) & 0x0F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0x10FFFF) { + out.push_back(static_cast(0xF0 | ((ch >> 18) & 0x07))); + out.push_back(static_cast(0x80 | ((ch >> 12) & 0x3F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else { + // Invalid Unicode code point + } +} + +// Helper function to remove the last UTF-8 character from a string +void pop_back_utf8_char(std::string & line) { + if (line.empty()) { + return; + } + + size_t pos = line.length() - 1; + + // Find the start of the last UTF-8 character (checking up to 4 bytes back) + for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) { + if ((line[pos] & 0xC0) != 0x80) break; // Found the start of the character + } + line.erase(pos); +} + +bool console_readline(console_state & con_st, std::string & line) { + console_set_color(con_st, CONSOLE_COLOR_USER_INPUT); + if (con_st.out != stdout) { + fflush(stdout); + } + + line.clear(); + std::vector widths; + bool is_special_char = false; + bool end_of_stream = false; + + char32_t input_char; + while (true) { + fflush(con_st.out); // Ensure all output is displayed before waiting for input + input_char = getchar32(); + + if (input_char == '\r' || input_char == '\n') { + break; + } + + if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) { + end_of_stream = true; + break; + } + + if (is_special_char) { + console_set_color(con_st, CONSOLE_COLOR_USER_INPUT); + replace_last(con_st, line.back()); + is_special_char = false; + } + + if (input_char == '\033') { // Escape sequence + char32_t code = getchar32(); + if (code == '[' || code == 0x1B) { + // Discard the rest of the escape sequence + while ((code = getchar32()) != (char32_t) WEOF) { + if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') { + break; + } + } + } + } else if (input_char == 0x08 || input_char == 0x7F) { // Backspace + if (!widths.empty()) { + int count; + do { + count = widths.back(); + widths.pop_back(); + // Move cursor back, print space, and move cursor back again + for (int i = 0; i < count; i++) { + replace_last(con_st, ' '); + pop_cursor(con_st); + } + pop_back_utf8_char(line); + } while (count == 0 && !widths.empty()); + } + } else { + int offset = line.length(); + append_utf8(input_char, line); + int width = put_codepoint(con_st, line.c_str() + offset, line.length() - offset, estimateWidth(input_char)); + if (width < 0) { + width = 0; + } + widths.push_back(width); + } + + if (!line.empty() && (line.back() == '\\' || line.back() == '/')) { + console_set_color(con_st, CONSOLE_COLOR_PROMPT); + replace_last(con_st, line.back()); + is_special_char = true; + } + } + + bool has_more = con_st.multiline_input; + if (is_special_char) { + replace_last(con_st, ' '); + pop_cursor(con_st); + + char last = line.back(); + line.pop_back(); + if (last == '\\') { + line += '\n'; + fputc('\n', con_st.out); + has_more = !has_more; + } else { + // llama will just eat the single space, it won't act as a space + if (line.length() == 1 && line.back() == ' ') { + line.clear(); + pop_cursor(con_st); + } + has_more = false; + } + } else { + if (end_of_stream) { + has_more = false; + } else { + line += '\n'; + fputc('\n', con_st.out); + } + } + + fflush(con_st.out); + return has_more; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/common.h b/ctransformers/models/submodules/ggllm.cpp/examples/common.h new file mode 100644 index 0000000000000000000000000000000000000000..6c2953cb2a7c672c5eb00eea003c1ba0360bfaaf --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/common.h @@ -0,0 +1,137 @@ +// Various helper functions and utilities + +#pragma once + +#include "llama.h" + +#include +#include +#include +#include +#include + +#if !defined (_WIN32) +#include +#include +#endif + +// +// CLI argument parsing +// +int32_t get_num_physical_cores(); + +struct gpt_params { + int32_t seed = -1; // RNG seed + int32_t n_threads = get_num_physical_cores(); + int32_t n_predict = -1; // new tokens to predict + int32_t n_ctx = 512; // context size + int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_keep = 0; // number of tokens to keep from initial prompt + int32_t n_gpu_layers = 0; // number of layers to store in VRAM + int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors + float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs + bool low_vram = 0; // if true, reduce VRAM usage at the cost of performance + + // sampling parameters + std::unordered_map logit_bias; // logit bias for specific tokens + int32_t top_k = 40; // <= 0 to use vocab size + float top_p = 0.95f; // 1.0 = disabled + float tfs_z = 1.00f; // 1.0 = disabled + float typical_p = 1.00f; // 1.0 = disabled + float temp = 0.80f; // 1.0 = disabled + float repeat_penalty = 1.10f; // 1.0 = disabled + int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) + float frequency_penalty = 0.00f; // 0.0 = disabled + float presence_penalty = 0.00f; // 0.0 = disabled + int mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 + float mirostat_tau = 5.00f; // target entropy + float mirostat_eta = 0.10f; // learning rate + + std::string model = "models/7B/ggml-model.bin"; // model path + std::string model_alias = "unknown"; // model alias + std::string prompt = ""; + std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state + std::string input_prefix = ""; // string to prefix user inputs with + std::string input_suffix = ""; // string to suffix user inputs with + std::vector antiprompt; // string upon seeing which more user input is prompted + + std::string lora_adapter = ""; // lora adapter path + std::string lora_base = ""; // base model path for the lora adapter + + bool memory_f16 = true; // use f16 instead of f32 for memory kv + bool random_prompt = false; // do not randomize prompt if none provided + bool use_color = false; // use color to distinguish generations and inputs + bool interactive = false; // interactive mode + bool prompt_cache_all = false; // save user input and generations to prompt cache + bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it + + bool embedding = false; // get only sentence embedding + bool interactive_first = false; // wait for user input immediately + bool multiline_input = false; // reverse the usage of `\` + + bool instruct = false; // instruction mode (used for Alpaca models) + bool penalize_nl = true; // consider newlines as a repeatable token + bool perplexity = false; // compute perplexity over the prompt + bool use_mmap = true; // use mmap for faster loads + bool use_mlock = false; // use mlock to keep model in memory + bool mem_test = false; // compute maximum memory usage + bool export_cgraph = false; // export the computation graph + bool verbose_prompt = false; // print prompt tokens before generation +}; + +bool gpt_params_parse(int argc, char ** argv, gpt_params & params); + +void gpt_print_usage(int argc, char ** argv, const gpt_params & params); + +std::string gpt_random_prompt(std::mt19937 & rng); + +// +// Vocab utils +// + +std::vector llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos); + +// +// Model utils +// + +struct llama_context * llama_init_from_gpt_params(const gpt_params & params); + +// +// Console utils +// + +#define ANSI_COLOR_RED "\x1b[31m" +#define ANSI_COLOR_GREEN "\x1b[32m" +#define ANSI_COLOR_YELLOW "\x1b[33m" +#define ANSI_COLOR_BLUE "\x1b[34m" +#define ANSI_COLOR_MAGENTA "\x1b[35m" +#define ANSI_COLOR_CYAN "\x1b[36m" +#define ANSI_COLOR_RESET "\x1b[0m" +#define ANSI_BOLD "\x1b[1m" + +enum console_color_t { + CONSOLE_COLOR_DEFAULT=0, + CONSOLE_COLOR_PROMPT, + CONSOLE_COLOR_USER_INPUT, + CONSOLE_COLOR_ERROR +}; + +struct console_state { + bool multiline_input = false; + bool use_color = false; + console_color_t color = CONSOLE_COLOR_DEFAULT; + + FILE* out = stdout; +#if defined (_WIN32) + void* hConsole; +#else + FILE* tty = nullptr; + termios prev_state; +#endif +}; + +void console_init(console_state & con_st); +void console_cleanup(console_state & con_st); +void console_set_color(console_state & con_st, console_color_t color); +bool console_readline(console_state & con_st, std::string & line); diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/embedding/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/embedding/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..db73b6b44f07f479161ed292385fb021df2ae6f2 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/embedding/CMakeLists.txt @@ -0,0 +1,7 @@ +set(TARGET embedding) +add_executable(${TARGET} embedding.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/embedding/README.md b/ctransformers/models/submodules/ggllm.cpp/examples/embedding/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fe8f5dcc62ed9e6e09202a7c6d4464679dff9be5 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/embedding/README.md @@ -0,0 +1,3 @@ +# embedding + +TODO diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/embedding/embedding.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/embedding/embedding.cpp new file mode 100644 index 0000000000000000000000000000000000000000..860f99f672c9cab71f4e91ac28caf1c5bdafab7d --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/embedding/embedding.cpp @@ -0,0 +1,95 @@ +#include "common.h" +#include "llama.h" +#include "build-info.h" + +#include + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +int main(int argc, char ** argv) { + gpt_params params; + + if (gpt_params_parse(argc, argv, params) == false) { + return 1; + } + + params.embedding = true; + + if (params.n_ctx > 2048) { + fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);" + "expect poor results\n", __func__, params.n_ctx); + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + + if (params.seed < 0) { + params.seed = time(NULL); + } + + fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + + std::mt19937 rng(params.seed); + if (params.random_prompt) { + params.prompt = gpt_random_prompt(rng); + } + + llama_init_backend(); + + llama_context * ctx; + + // load the model + ctx = llama_init_from_gpt_params(params); + if (ctx == NULL) { + fprintf(stderr, "%s: error: unable to load model\n", __func__); + return 1; + } + + // print system information + { + fprintf(stderr, "\n"); + fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", + params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); + } + + int n_past = 0; + + // Add a space in front of the first character to match OG llama tokenizer behavior + params.prompt.insert(0, 1, ' '); + + // tokenize the prompt + auto embd_inp = ::llama_tokenize(ctx, params.prompt, true); + + if (params.verbose_prompt) { + fprintf(stderr, "\n"); + fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); + fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); + for (int i = 0; i < (int) embd_inp.size(); i++) { + fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i])); + } + fprintf(stderr, "\n"); + } + + if (params.embedding){ + if (embd_inp.size() > 0) { + if (llama_eval(ctx, embd_inp.data(), embd_inp.size(), n_past, params.n_threads)) { + fprintf(stderr, "%s : failed to eval\n", __func__); + return 1; + } + } + + const int n_embd = llama_n_embd(ctx); + const auto embeddings = llama_get_embeddings(ctx); + + for (int i = 0; i < n_embd; i++) { + printf("%f ", embeddings[i]); + } + printf("\n"); + } + + llama_print_timings(ctx); + llama_free(ctx); + + return 0; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/falcon/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..19fca2355ddabb8131a1786dea820ce8ae975886 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon/CMakeLists.txt @@ -0,0 +1,8 @@ +set(TARGET falcon_main) +add_executable(${TARGET} falcon_main.cpp) +target_link_libraries(${TARGET} PRIVATE falcon_common libfalcon ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() + diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon/README.md b/ctransformers/models/submodules/ggllm.cpp/examples/falcon/README.md new file mode 100644 index 0000000000000000000000000000000000000000..149d507a8171da3b6c403cf2cdff97884f1fa3a2 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon/README.md @@ -0,0 +1,292 @@ +# llama.cpp/example/main + +This example program allows you to use various LLaMA language models in an easy and efficient way. It is specifically designed to work with the [llama.cpp](https://github.com/ggerganov/llama.cpp) project, which provides a plain C/C++ implementation with optional 4-bit quantization support for faster, lower memory inference, and is optimized for desktop CPUs. This program can be used to perform various inference tasks with LLaMA models, including generating text based on user-provided prompts and chat-like interactions with reverse prompts. + +## Table of Contents + +1. [Quick Start](#quick-start) +2. [Common Options](#common-options) +3. [Input Prompts](#input-prompts) +4. [Interaction](#interaction) +5. [Context Management](#context-management) +6. [Generation Flags](#generation-flags) +7. [Performance Tuning and Memory Options](#performance-tuning-and-memory-options) +8. [Additional Options](#additional-options) + +## Quick Start + +To get started right away, run the following command, making sure to use the correct path for the model you have: + +#### Unix-based systems (Linux, macOS, etc.): + +```bash +./main -m models/7B/ggml-model.bin --prompt "Once upon a time" +``` + +#### Windows: + +```powershell +main.exe -m models\7B\ggml-model.bin --prompt "Once upon a time" +``` + +For an interactive experience, try this command: + +#### Unix-based systems (Linux, macOS, etc.): + +```bash +./main -m models/7B/ggml-model.bin -n -1 --color -r "User:" --in-prefix " " \ +'User: Hi +AI: Hello. I am an AI chatbot. Would you like to talk? +User: Sure! +AI: What would you like to talk about? +User:' +``` + +#### Windows: + +```powershell +main.exe -m models\7B\ggml-model.bin -n -1 --color -r "User:" --in-prefix " " -e --prompt "User: Hi\nAI: Hello. I am an AI chatbot. Would you like to talk?\nUser: Sure!\nAI: What would you like to talk about?\nUser:" +``` + +The following command generates "infinite" text from a starting prompt (you can use `Ctrl-C` to stop it): + +#### Unix-based systems (Linux, macOS, etc.): + +```bash +./main -m models/7B/ggml-model.bin --ignore-eos -n -1 --random-prompt +``` + +#### Windows: + +```powershell +main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt +``` + +## Common Options + +In this section, we cover the most commonly used options for running the `main` program with the LLaMA models: + +- `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`). +- `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses. +- `-ins, --instruct`: Run the program in instruction mode, which is particularly useful when working with Alpaca models. +- `-n N, --n-predict N`: Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text. +- `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. + +## Input Prompts + +The `main` program provides several ways to interact with the LLaMA models using input prompts: + +- `--prompt PROMPT`: Provide a prompt directly as a command-line option. +- `--file FNAME`: Provide a file containing a prompt or multiple prompts. +- `--interactive-first`: Run the program in interactive mode and wait for input right away. (More on this below.) +- `--random-prompt`: Start with a randomized prompt. + +## Interaction + +The `main` program offers a seamless way to interact with LLaMA models, allowing users to engage in real-time conversations or provide instructions for specific tasks. The interactive mode can be triggered using various options, including `--interactive`, `--interactive-first`, and `--instruct`. + +In interactive mode, users can participate in text generation by injecting their input during the process. Users can press `Ctrl+C` at any time to interject and type their input, followed by pressing `Return` to submit it to the LLaMA model. To submit additional lines without finalizing input, users can end the current line with a backslash (`\`) and continue typing. + +### Interaction Options + +- `-i, --interactive`: Run the program in interactive mode, allowing users to engage in real-time conversations or provide specific instructions to the model. +- `--interactive-first`: Run the program in interactive mode and immediately wait for user input before starting the text generation. +- `-ins, --instruct`: Run the program in instruction mode, which is specifically designed to work with Alpaca models that excel in completing tasks based on user instructions. +- `--color`: Enable colorized output to differentiate visually distinguishing between prompts, user input, and generated text. + +By understanding and utilizing these interaction options, you can create engaging and dynamic experiences with the LLaMA models, tailoring the text generation process to your specific needs. + +### Reverse Prompts + +Reverse prompts are a powerful way to create a chat-like experience with a LLaMA model by pausing the text generation when specific text strings are encountered: + +- `-r PROMPT, --reverse-prompt PROMPT`: Specify one or multiple reverse prompts to pause text generation and switch to interactive mode. For example, `-r "User:"` can be used to jump back into the conversation whenever it's the user's turn to speak. This helps create a more interactive and conversational experience. However, the reverse prompt doesn't work when it ends with a space. + +To overcome this limitation, you can use the `--in-prefix` flag to add a space or any other characters after the reverse prompt. + +### In-Prefix + +The `--in-prefix` flag is used to add a prefix to your input, primarily, this is used to insert a space after the reverse prompt. Here's an example of how to use the `--in-prefix` flag in conjunction with the `--reverse-prompt` flag: + +```sh +./main -r "User:" --in-prefix " " +``` + +### In-Suffix + +The `--in-suffix` flag is used to add a suffix after your input. This is useful for adding an "Assistant:" prompt after the user's input. It's added after the new-line character (`\n`) that's automatically added to the end of the user's input. Here's an example of how to use the `--in-suffix` flag in conjunction with the `--reverse-prompt` flag: + +```sh +./main -r "User:" --in-prefix " " --in-suffix "Assistant:" +``` + +### Instruction Mode + +Instruction mode is particularly useful when working with Alpaca models, which are designed to follow user instructions for specific tasks: + +- `-ins, --instruct`: Enable instruction mode to leverage the capabilities of Alpaca models in completing tasks based on user-provided instructions. + +Technical detail: the user's input is internally prefixed with the reverse prompt (or `### Instruction:` as the default), and followed by `### Response:` (except if you just press Return without any input, to keep generating a longer response). + +By understanding and utilizing these interaction options, you can create engaging and dynamic experiences with the LLaMA models, tailoring the text generation process to your specific needs. + +## Context Management + +During text generation, LLaMA models have a limited context size, which means they can only consider a certain number of tokens from the input and generated text. When the context fills up, the model resets internally, potentially losing some information from the beginning of the conversation or instructions. Context management options help maintain continuity and coherence in these situations. + +### Context Size + +The `--ctx-size` option allows you to set the size of the prompt context used by the LLaMA models during text generation. A larger context size helps the model to better comprehend and generate responses for longer input or conversations. + +- `-c N, --ctx-size N`: Set the size of the prompt context (default: 512). The LLaMA models were built with a context of 2048, which will yield the best results on longer input/inference. However, increasing the context size beyond 2048 may lead to unpredictable results. + +### Keep Prompt + +The `--keep` option allows users to retain the original prompt when the model runs out of context, ensuring a connection to the initial instruction or conversation topic is maintained. + +- `--keep N`: Specify the number of tokens from the initial prompt to retain when the model resets its internal context. By default, this value is set to 0 (meaning no tokens are kept). Use `-1` to retain all tokens from the initial prompt. + +By utilizing context management options like `--ctx-size` and `--keep`, you can maintain a more coherent and consistent interaction with the LLaMA models, ensuring that the generated text remains relevant to the original prompt or conversation. + +## Generation Flags + +The following options allow you to control the text generation process and fine-tune the diversity, creativity, and quality of the generated text according to your needs. By adjusting these options and experimenting with different combinations of values, you can find the best settings for your specific use case. + +### Number of Tokens to Predict + +- `-n N, --n-predict N`: Set the number of tokens to predict when generating text (default: 128, -1 = infinity). + +The `--n-predict` option controls the number of tokens the model generates in response to the input prompt. By adjusting this value, you can influence the length of the generated text. A higher value will result in longer text, while a lower value will produce shorter text. A value of -1 will cause text to be generated without limit. + +It is important to note that the generated text may be shorter than the specified number of tokens if an End-of-Sequence (EOS) token or a reverse prompt is encountered. In interactive mode text generation will pause and control will be returned to the user. In non-interactive mode, the program will end. In both cases, the text generation may stop before reaching the specified `n-predict` value. If you want the model to keep going without ever producing End-of-Sequence on its own, you can use the `--ignore-eos` parameter. + +### Temperature + +- `--temp N`: Adjust the randomness of the generated text (default: 0.8). + +Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. + +Example usage: `--temp 0.5` + +### Repeat Penalty + +- `--repeat-penalty N`: Control the repetition of token sequences in the generated text (default: 1.1). +- `--repeat-last-n N`: Last n tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size). +- `--no-penalize-nl`: Disable penalization for newline tokens when applying the repeat penalty. + +The `repeat-penalty` option helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. The default value is 1.1. + +The `repeat-last-n` option controls the number of tokens in the history to consider for penalizing repetition. A larger value will look further back in the generated text to prevent repetitions, while a smaller value will only consider recent tokens. A value of 0 disables the penalty, and a value of -1 sets the number of tokens considered equal to the context size (`ctx-size`). + +Use the `--no-penalize-nl` option to disable newline penalization when applying the repeat penalty. This option is particularly useful for generating chat conversations, dialogues, code, poetry, or any text where newline tokens play a significant role in structure and formatting. Disabling newline penalization helps maintain the natural flow and intended formatting in these specific use cases. + +Example usage: `--repeat-penalty 1.15 --repeat-last-n 128 --no-penalize-nl` + +### Top-K Sampling + +- `--top-k N`: Limit the next token selection to the K most probable tokens (default: 40). + +Top-k sampling is a text generation method that selects the next token only from the top k most likely tokens predicted by the model. It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit the diversity of the output. A higher value for top-k (e.g., 100) will consider more tokens and lead to more diverse text, while a lower value (e.g., 10) will focus on the most probable tokens and generate more conservative text. The default value is 40. + +Example usage: `--top-k 30` + +### Top-P Sampling + +- `--top-p N`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9). + +Top-p sampling, also known as nucleus sampling, is another text generation method that selects the next token from a subset of tokens that together have a cumulative probability of at least p. This method provides a balance between diversity and quality by considering both the probabilities of tokens and the number of tokens to sample from. A higher value for top-p (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. The default value is 0.9. + +Example usage: `--top-p 0.95` + +### Tail Free Sampling (TFS) + +- `--tfs N`: Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled). + +Tail free sampling (TFS) is a text generation technique that aims to reduce the impact of less likely tokens, which may be less relevant, less coherent, or nonsensical, on the output. The method adjusts the logits (token probabilities) by raising them to the power of the parameter z. A higher value of z (e.g., 2.0) will further suppress less likely tokens from the tail of the distribution, while a value of 1.0 disables the effect of TFS. By setting the parameter z, you can control how much the probabilities of less likely tokens are reduced. + +Example usage: `--tfs 2.0` + +### Locally Typical Sampling + +- `--typical N`: Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled). + +Locally typical sampling promotes the generation of contextually coherent and diverse text by sampling tokens that are typical or expected based on the surrounding context. By setting the parameter p between 0 and 1, you can control the balance between producing text that is locally coherent and diverse. A value closer to 1 will promote more contextually coherent tokens, while a value closer to 0 will promote more diverse tokens. A value equal to 1 disables locally typical sampling. + +Example usage: `--typical 0.9` + +### Mirostat Sampling + +- `--mirostat N`: Enable Mirostat sampling, controlling perplexity during text generation (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0). +- `--mirostat-lr N`: Set the Mirostat learning rate, parameter eta (default: 0.1). +- `--mirostat-ent N`: Set the Mirostat target entropy, parameter tau (default: 5.0). + +Mirostat is an algorithm that actively maintains the quality of generated text within a desired range during text generation. It aims to strike a balance between coherence and diversity, avoiding low-quality output caused by excessive repetition (boredom traps) or incoherence (confusion traps). + +The `--mirostat-lr` option sets the Mirostat learning rate (eta). The learning rate influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. The default value is `0.1`. + +The `--mirostat-ent` option sets the Mirostat target entropy (tau), which represents the desired perplexity value for the generated text. Adjusting the target entropy allows you to control the balance between coherence and diversity in the generated text. A lower value will result in more focused and coherent text, while a higher value will lead to more diverse and potentially less coherent text. The default value is `5.0`. + +Example usage: `--mirostat 2 --mirostat-lr 0.05 --mirostat-ent 3.0` + +### Logit Bias + +- `-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS`: Modify the likelihood of a token appearing in the generated text completion. + +The logit bias option allows you to manually adjust the likelihood of specific tokens appearing in the generated text. By providing a token ID and a positive or negative bias value, you can increase or decrease the probability of that token being generated. + +For example, use `--logit-bias 15043+1` to increase the likelihood of the token 'Hello', or `--logit-bias 15043-1` to decrease its likelihood. Using a value of negative infinity, `--logit-bias 15043-inf` ensures that the token `Hello` is never produced. + +A more practical use case might be to prevent the generation of `\code{begin}` and `\code{end}` by setting the `\` token (29905) to negative infinity with `-l 29905-inf`. (This is due to the prevalence of LaTeX codes that show up in LLaMA model inference.) + +Example usage: `--logit-bias 29905-inf` + +### RNG Seed + +- `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed). + +The RNG seed is used to initialize the random number generator that influences the text generation process. By setting a specific seed value, you can obtain consistent and reproducible results across multiple runs with the same input and settings. This can be helpful for testing, debugging, or comparing the effects of different options on the generated text to see when they diverge. If the seed is set to a value less than 0, a random seed will be used, which will result in different outputs on each run. + +## Performance Tuning and Memory Options + +These options help improve the performance and memory usage of the LLaMA models. By adjusting these settings, you can fine-tune the model's behavior to better suit your system's capabilities and achieve optimal performance for your specific use case. + +### Number of Threads + +- `-t N, --threads N`: Set the number of threads to use during computation. For optimal performance, it is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Using the correct number of threads can greatly improve performance. + +### Mlock + +- `--mlock`: Lock the model in memory, preventing it from being swapped out when memory-mapped. This can improve performance but trades away some of the advantages of memory-mapping by requiring more RAM to run and potentially slowing down load times as the model loads into RAM. + +### No Memory Mapping + +- `--no-mmap`: Do not memory-map the model. By default, models are mapped into memory, which allows the system to load only the necessary parts of the model as needed. However, if the model is larger than your total amount of RAM or if your system is low on available memory, using mmap might increase the risk of pageouts, negatively impacting performance. Disabling mmap results in slower load times but may reduce pageouts if you're not using `--mlock`. Note that if the model is larger than the total amount of RAM, turning off mmap would prevent the model from loading at all. + +### Memory Float 32 + +- `--memory-f32`: Use 32-bit floats instead of 16-bit floats for memory key+value. This doubles the context memory requirement and cached prompt file size but does not appear to increase generation quality in a measurable way. Not recommended. + +### Batch Size + +- `-b N, --batch-size N`: Set the batch size for prompt processing (default: 512). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations. + +### Prompt Caching + +- `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs. **Note**: Restoring a cached prompt does not imply restoring the exact state of the session at the point it was saved. So even when specifying a specific seed, you are not guaranteed to get the same sequence of tokens as the original generation. + +### Quantization + +For information about 4-bit quantization, which can significantly improve performance and reduce memory usage, please refer to llama.cpp's primary [README](../../README.md#prepare-data--run). + +## Additional Options + +These options provide extra functionality and customization when running the LLaMA models: + +- `-h, --help`: Display a help message showing all available options and their default values. This is particularly useful for checking the latest options and default values, as they can change frequently, and the information in this document may become outdated. +- `--verbose-prompt`: Print the prompt before generating text. +- `--mtest`: Test the model's functionality by running a series of tests to ensure it's working properly. +- `-ngl N, --n-gpu-layers N`: When compiled with appropriate support (currently CLBlast or cuBLAS), this option allows offloading some layers to the GPU for computation. Generally results in increased performance. +- `-mg i, --main-gpu i`: When using multiple GPUs this option controls which GPU is used for small tensors for which the overhead of splitting the computation across all GPUs is not worthwhile. The GPU in question will use slightly more VRAM to store a scratch buffer for temporary results. By default GPU 0 is used. Requires cuBLAS. +- `-ts SPLIT, --tensor-split SPLIT`: When using multiple GPUs this option controls how large tensors should be split across all GPUs. `SPLIT` is a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order. For example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1. By default the data is split in proportion to VRAM but this may not be optimal for performance. Requires cuBLAS. +- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains. +- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation. diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon/falcon_main.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/falcon/falcon_main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0aad16cb0c44d4c375e871cacbb4f024dc241923 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon/falcon_main.cpp @@ -0,0 +1,690 @@ +// Defines sigaction on msys: +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include "falcon_common.h" +#include "libfalcon.h" +#include "build-info.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include +#include +#elif defined (_WIN32) +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#include +#endif + +static console_state con_st; +static falcon_context ** g_ctx; + +std::unordered_map special_tokens_ids = { + {">>TITLE<<", 0}, + {">>ABSTRACT<<", 1}, + {">>INTRODUCTION<<", 2}, + {">>SUMMARY<<", 3}, + {">>COMMENT<<", 4}, + {">>ANSWER<<", 5}, + {">>QUESTION<<", 6}, + {">>DOMAIN<<", 7}, + {">>PREFIX<<", 8}, + {">>SUFFIX<<", 9}, + {">>MIDDLE<<", 10}, + {"<|endoftext|>", 11}, +}; + + + + +static bool is_interacting = false; + +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) +void sigint_handler(int signo) { + if (signo == SIGINT) { + if (!is_interacting) { + is_interacting=true; + } else { + console_cleanup(con_st); + printf("\n"); + falcon_print_timings(*g_ctx); + _exit(130); + } + } +} +#endif + +int main(int argc, char ** argv) { + gpt_params params; + + if (gpt_params_parse(argc, argv, params) == false) { + return 1; + } + + // save choice to use color for later + // (note for later: this is a slightly awkward choice) + con_st.use_color = params.use_color; + con_st.multiline_input = params.multiline_input; + console_init(con_st); + atexit([]() { console_cleanup(con_st); }); + + if (params.perplexity) { + printf("\n************\n"); + printf("%s: please use the 'perplexity' tool for perplexity calculations\n", __func__); + printf("************\n\n"); + + return 0; + } + + if (params.embedding) { + printf("\n************\n"); + printf("%s: please use the 'embedding' tool for embedding calculations\n", __func__); + printf("************\n\n"); + + return 0; + } + + if (params.n_ctx > 2048) { + fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);" + "expect poor results\n", __func__, params.n_ctx); + } else if (params.n_ctx < 8) { + fprintf(stderr, "%s: warning: minimum context size is 8, using minimum size.\n", __func__); + params.n_ctx = 8; + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + + if (params.seed < 0) { + params.seed = time(NULL); + } + + fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + + std::mt19937 rng(params.seed); + if (params.random_prompt) { + params.prompt = gpt_random_prompt(rng); + } + + llama_init_backend(); + + falcon_context * ctx; + g_ctx = &ctx; + + // load the model and apply lora adapter, if any + ctx = falcon_init_from_gpt_params(params); + if (ctx == NULL) { + fprintf(stderr, "%s: error: unable to load model\n", __func__); + return 1; + } + + // print system information + { + fprintf(stderr, "\n"); + fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", + params.n_threads, std::thread::hardware_concurrency(), falcon_print_system_info()); + } + + // determine the maximum memory usage needed to do inference for the given n_batch and n_predict parameters + // uncomment the "used_mem" line in llama.cpp to see the results + if (params.mem_test) { + { + const std::vector tmp(params.n_batch, falcon_token_bos()); + falcon_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads,params.debug_timings); + } + + { + const std::vector tmp = { 0, }; + falcon_eval(ctx, tmp.data(), tmp.size(), params.n_predict - 1, params.n_threads,params.debug_timings); + } + + falcon_print_timings(ctx); + llama_free(ctx); + + return 0; + } + + // export the cgraph and exit + if (params.export_cgraph) { + falcon_eval_export(ctx, "llama.ggml"); + llama_free(ctx); + + return 0; + } + + std::string path_session = params.path_prompt_cache; + std::vector session_tokens; + + if (!path_session.empty()) { + fprintf(stderr, "%s: attempting to load saved session from '%s'\n", __func__, path_session.c_str()); + + // fopen to check for existing session + FILE * fp = std::fopen(path_session.c_str(), "rb"); + if (fp != NULL) { + std::fclose(fp); + + session_tokens.resize(params.n_ctx); + size_t n_token_count_out = 0; + if (!llama_load_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) { + fprintf(stderr, "%s: error: failed to load session file '%s'\n", __func__, path_session.c_str()); + return 1; + } + session_tokens.resize(n_token_count_out); + llama_set_rng_seed(ctx, params.seed); + + fprintf(stderr, "%s: loaded a session with prompt size of %d tokens\n", __func__, (int) session_tokens.size()); + } else { + fprintf(stderr, "%s: session file does not exist, will create\n", __func__); + } + } + + // tokenize the prompt + std::vector embd_inp; + + if (params.interactive_first || params.instruct || !params.prompt.empty() || session_tokens.empty()) { + // Falcon does not have a dedicated bos token (bos==eos), so don't inject it here + embd_inp = ::falcon_tokenize(ctx, params.prompt, false); + } else { + embd_inp = session_tokens; + } + + const int n_ctx = falcon_n_ctx(ctx); + + if ((int) embd_inp.size() > n_ctx - 4) { + fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int) embd_inp.size(), n_ctx - 4); + return 1; + } + + // debug message about similarity of saved session, if applicable + size_t n_matching_session_tokens = 0; + if (session_tokens.size()) { + for (llama_token id : session_tokens) { + if (n_matching_session_tokens >= embd_inp.size() || id != embd_inp[n_matching_session_tokens]) { + break; + } + n_matching_session_tokens++; + } + if (params.prompt.empty() && n_matching_session_tokens == embd_inp.size()) { + fprintf(stderr, "%s: using full prompt from session file\n", __func__); + } else if (n_matching_session_tokens >= embd_inp.size()) { + fprintf(stderr, "%s: session file has exact match for prompt!\n", __func__); + } else if (n_matching_session_tokens < (embd_inp.size() / 2)) { + fprintf(stderr, "%s: warning: session file has low similarity to prompt (%zu / %zu tokens); will mostly be reevaluated\n", + __func__, n_matching_session_tokens, embd_inp.size()); + } else { + fprintf(stderr, "%s: session file matches %zu / %zu tokens of prompt\n", + __func__, n_matching_session_tokens, embd_inp.size()); + } + } + + // if we will use the cache for the full prompt without reaching the end of the cache, force + // reevaluation of the last token token to recalculate the cached logits + if (!embd_inp.empty() && n_matching_session_tokens == embd_inp.size() && + session_tokens.size() > embd_inp.size()) { + session_tokens.resize(embd_inp.size() - 1); + } + + // number of tokens to keep when resetting context + if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct) { + params.n_keep = (int)embd_inp.size(); + } + + // prefix & suffix for instruct mode + const auto inp_pfx = ::falcon_tokenize(ctx, "\n\n### Instruction:\n\n", true); + const auto inp_sfx = ::falcon_tokenize(ctx, "\n\n### Response:\n\n", false); + + // in instruct mode, we inject a prefix and a suffix to each input by the user + if (params.instruct) { + params.interactive_first = true; + params.antiprompt.push_back("### Instruction:\n\n"); + } + + // enable interactive mode if interactive start is specified + if (params.interactive_first) { + params.interactive = true; + } + + // determine newline token + auto llama_token_newline = ::falcon_tokenize(ctx, "\n", false); + + if (params.verbose_prompt) { + fprintf(stderr, "\n"); + fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); + fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); + for (int i = 0; i < (int) embd_inp.size(); i++) { + fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], falcon_token_to_str(ctx, embd_inp[i])); + } + if (params.n_keep > 0) { + fprintf(stderr, "%s: static prompt based on n_keep: '", __func__); + for (int i = 0; i < params.n_keep; i++) { + fprintf(stderr, "%s", falcon_token_to_str(ctx, embd_inp[i])); + } + fprintf(stderr, "'\n"); + } + fprintf(stderr, "\n"); + } + + if (params.interactive) { +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) + struct sigaction sigint_action; + sigint_action.sa_handler = sigint_handler; + sigemptyset (&sigint_action.sa_mask); + sigint_action.sa_flags = 0; + sigaction(SIGINT, &sigint_action, NULL); +#elif defined (_WIN32) + auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL { + return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false; + }; + SetConsoleCtrlHandler(static_cast(console_ctrl_handler), true); +#endif + + fprintf(stderr, "%s: interactive mode on.\n", __func__); + + if (params.antiprompt.size()) { + for (auto antiprompt : params.antiprompt) { + fprintf(stderr, "Reverse prompt: '%s'\n", antiprompt.c_str()); + } + } + + if (!params.input_prefix.empty()) { + fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str()); + } + + if (!params.input_suffix.empty()) { + fprintf(stderr, "Input suffix: '%s'\n", params.input_suffix.c_str()); + } + } + fprintf(stderr, "sampling: repeat_last_n = %d, repeat_penalty = %f, presence_penalty = %f, frequency_penalty = %f, top_k = %d, tfs_z = %f, top_p = %f, typical_p = %f, temp = %f, mirostat = %d, mirostat_lr = %f, mirostat_ent = %f\n", + params.repeat_last_n, params.repeat_penalty, params.presence_penalty, params.frequency_penalty, params.top_k, params.tfs_z, params.top_p, params.typical_p, params.temp, params.mirostat, params.mirostat_eta, params.mirostat_tau); + fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep); + fprintf(stderr, "\n\n"); + + // TODO: replace with ring-buffer + std::vector last_n_tokens(n_ctx); + std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0); + + if (params.interactive) { + const char *control_message; + if (con_st.multiline_input) { + control_message = " - To return control to LLaMa, end your input with '\\'.\n" + " - To return control without starting a new line, end your input with '/'.\n"; + } else { + control_message = " - Press Return to return control to LLaMa.\n" + " - To return control without starting a new line, end your input with '/'.\n" + " - If you want to submit another line, end your input with '\\'.\n"; + } + fprintf(stderr, "== Running in interactive mode. ==\n" +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) + " - Press Ctrl+C to interject at any time.\n" +#endif + "%s\n", control_message); + + is_interacting = params.interactive_first; + } + + bool is_antiprompt = false; + bool input_echo = true; + bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < embd_inp.size(); + + int n_past = 0; + int n_remain = params.n_predict; + int n_consumed = 0; + int n_session_consumed = 0; + + // the first thing we will do is to output the prompt, so set color accordingly + console_set_color(con_st, CONSOLE_COLOR_PROMPT); + + std::vector embd; + + // do one empty run to warm up the model + { + const std::vector tmp = { falcon_token_bos(), }; + falcon_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads,0); + llama_reset_timings(ctx); + } + + while ((n_remain != 0 && !is_antiprompt) || params.interactive) { + // predict + if (embd.size() > 0) { + // Note: n_ctx - 4 here is to match the logic for commandline prompt handling via + // --prompt or --file which uses the same value. + auto max_embd_size = n_ctx - 4; + // Ensure the input doesn't exceed the context size by truncating embd if necessary. + if ((int)embd.size() > max_embd_size) { + auto skipped_tokens = embd.size() - max_embd_size; + console_set_color(con_st, CONSOLE_COLOR_ERROR); + printf("<>", skipped_tokens, skipped_tokens != 1 ? "s" : ""); + console_set_color(con_st, CONSOLE_COLOR_DEFAULT); + fflush(stdout); + embd.resize(max_embd_size); + } + + // infinite text generation via context swapping + // if we run out of context: + // - take the n_keep first tokens from the original prompt (via n_past) + // - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches + if (n_past + (int) embd.size() > n_ctx) { + const int n_left = n_past - params.n_keep; + + n_past = params.n_keep; + + // insert n_left/2 tokens at the start of embd from last_n_tokens + embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left/2 - embd.size(), last_n_tokens.end() - embd.size()); + + // stop saving session if we run out of context + path_session.clear(); + + //printf("\n---\n"); + //printf("resetting: '"); + //for (int i = 0; i < (int) embd.size(); i++) { + // printf("%s", falcon_token_to_str(ctx, embd[i])); + //} + //printf("'\n"); + //printf("\n---\n"); + } + + // try to reuse a matching prefix from the loaded session instead of re-eval (via n_past) + if (n_session_consumed < (int) session_tokens.size()) { + size_t i = 0; + for ( ; i < embd.size(); i++) { + if (embd[i] != session_tokens[n_session_consumed]) { + session_tokens.resize(n_session_consumed); + break; + } + + n_past++; + n_session_consumed++; + + if (n_session_consumed >= (int) session_tokens.size()) { + ++i; + break; + } + } + if (i > 0) { + embd.erase(embd.begin(), embd.begin() + i); + } + } + // We have buffers from the warmup run that won't all align with a batched run +#if defined(GGML_USE_CUBLAS) + if (params.n_batch > 1 && embd.size() > 1) + ggml_cuda_pool_free_all(-1); +#endif + // evaluate tokens in batches + // embd is typically prepared beforehand to fit within a batch, but not always + for (int i = 0; i < (int) embd.size(); i += params.n_batch) { + int n_eval = (int) embd.size() - i; + if (n_eval > params.n_batch) { + n_eval = params.n_batch; + } + int debug_timings = params.debug_timings; + if (n_remain == 1 && debug_timings == 2) debug_timings = 3; // we have no access to the last information in eval() + if (falcon_eval(ctx, &embd[i], n_eval, n_past, params.n_threads,debug_timings)) { + fprintf(stderr, "%s : failed to eval\n", __func__); + return 1; + } + n_past += n_eval; + } +#if defined(GGML_USE_CUBLAS) + // frees unused allocations, those during batch processing are of different size than single token eval + if (params.n_batch > 1 && embd.size() > 1) + ggml_cuda_pool_free_all(-1); +#endif + if (embd.size() > 0 && !path_session.empty()) { + session_tokens.insert(session_tokens.end(), embd.begin(), embd.end()); + n_session_consumed = session_tokens.size(); + } + } + + embd.clear(); + + if ((int) embd_inp.size() <= n_consumed && !is_interacting) { + // out of user input, sample next token + const float temp = params.temp; + const int32_t top_k = params.top_k <= 0 ? falcon_n_vocab(ctx) : params.top_k; + const float top_p = params.top_p; + const float tfs_z = params.tfs_z; + const float typical_p = params.typical_p; + const int32_t repeat_last_n = params.repeat_last_n < 0 ? n_ctx : params.repeat_last_n; + const float repeat_penalty = params.repeat_penalty; + const float alpha_presence = params.presence_penalty; + const float alpha_frequency = params.frequency_penalty; + const int mirostat = params.mirostat; + const float mirostat_tau = params.mirostat_tau; + const float mirostat_eta = params.mirostat_eta; + const bool penalize_nl = params.penalize_nl; + + // optionally save the session on first sample (for faster prompt loading next time) + if (!path_session.empty() && need_to_save_session && !params.prompt_cache_ro) { + need_to_save_session = false; + llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); + } + + llama_token id = 0; + + { + auto logits = falcon_get_logits(ctx); + auto n_vocab = falcon_n_vocab(ctx); + + // Apply params.logit_bias map + for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) { + logits[it->first] += it->second; + } + + std::vector candidates; + candidates.reserve(n_vocab); + for (llama_token token_id = 0; token_id < n_vocab; token_id++) { + candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f}); + } + + llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false }; + + // Apply penalties + float nl_logit = logits[falcon_token_nl()]; + auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx); + llama_sample_repetition_penalty(ctx, &candidates_p, + last_n_tokens.data() + last_n_tokens.size() - last_n_repeat, + last_n_repeat, repeat_penalty); + llama_sample_frequency_and_presence_penalties(ctx, &candidates_p, + last_n_tokens.data() + last_n_tokens.size() - last_n_repeat, + last_n_repeat, alpha_frequency, alpha_presence); + if (!penalize_nl) { + logits[falcon_token_nl()] = nl_logit; + } + + if (temp <= 0) { + // Greedy sampling + id = llama_sample_token_greedy(ctx, &candidates_p); + } else { + if (mirostat == 1) { + static float mirostat_mu = 2.0f * mirostat_tau; + const int mirostat_m = 100; + llama_sample_temperature(ctx, &candidates_p, temp); + id = llama_sample_token_mirostat(ctx, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu); + } else if (mirostat == 2) { + static float mirostat_mu = 2.0f * mirostat_tau; + llama_sample_temperature(ctx, &candidates_p, temp); + id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu); + } else { + // Temperature sampling + llama_sample_top_k(ctx, &candidates_p, top_k, 1); + llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1); + llama_sample_typical(ctx, &candidates_p, typical_p, 1); + llama_sample_top_p(ctx, &candidates_p, top_p, 1); + llama_sample_temperature(ctx, &candidates_p, temp); + id = llama_sample_token(ctx, &candidates_p); + } + } + // printf("`%d`", candidates_p.size); + + last_n_tokens.erase(last_n_tokens.begin()); + last_n_tokens.push_back(id); + } + + // replace end of text token with newline token when in interactive mode + if (id == falcon_token_eos() && params.interactive && !params.instruct) { + id = llama_token_newline.front(); + if (params.antiprompt.size() != 0) { + // tokenize and inject first reverse prompt + const auto first_antiprompt = ::falcon_tokenize(ctx, params.antiprompt.front(), false); + embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); + } + } + + // add it to the context + embd.push_back(id); + + // echo this to console + input_echo = true; + + // decrement remaining sampling budget + --n_remain; + } else { + // some user input remains from prompt or interaction, forward it to processing + while ((int) embd_inp.size() > n_consumed) { + embd.push_back(embd_inp[n_consumed]); + last_n_tokens.erase(last_n_tokens.begin()); + last_n_tokens.push_back(embd_inp[n_consumed]); + ++n_consumed; + if ((int) embd.size() >= params.n_batch) { + break; + } + } + } + + // display text + if (input_echo) { + for (auto id : embd) { + printf("%s", falcon_token_to_str(ctx, id)); + } + fflush(stdout); + } + // reset color to default if we there is no pending user input + if (input_echo && (int)embd_inp.size() == n_consumed) { + console_set_color(con_st, CONSOLE_COLOR_DEFAULT); + } + + // if not currently processing queued inputs; + if ((int) embd_inp.size() <= n_consumed) { + + // check for reverse prompt + if (params.antiprompt.size()) { + std::string last_output; + for (auto id : last_n_tokens) { + last_output += falcon_token_to_str(ctx, id); + } + + is_antiprompt = false; + // Check if each of the reverse prompts appears at the end of the output. + // If we're not running interactively, the reverse prompt might be tokenized with some following characters + // so we'll compensate for that by widening the search window a bit. + for (std::string & antiprompt : params.antiprompt) { + size_t extra_padding = params.interactive ? 0 : 2; + size_t search_start_pos = last_output.length() > static_cast(antiprompt.length() + extra_padding) + ? last_output.length() - static_cast(antiprompt.length() + extra_padding) + : 0; + + if (last_output.find(antiprompt.c_str(), search_start_pos) != std::string::npos) { + if (params.interactive) { + is_interacting = true; + console_set_color(con_st, CONSOLE_COLOR_USER_INPUT); + } + is_antiprompt = true; + fflush(stdout); + break; + } + } + } + + if (n_past > 0 && is_interacting) { + if (params.instruct) { + printf("\n> "); + } + + std::string buffer; + if (!params.input_prefix.empty()) { + buffer += params.input_prefix; + printf("%s", buffer.c_str()); + } + + std::string line; + bool another_line = true; + do { + another_line = console_readline(con_st, line); + buffer += line; + } while (another_line); + + // done taking input, reset color + console_set_color(con_st, CONSOLE_COLOR_DEFAULT); + + // Add tokens to embd only if the input buffer is non-empty + // Entering a empty line lets the user pass control back + if (buffer.length() > 1) { + // append input suffix if any + if (!params.input_suffix.empty()) { + buffer += params.input_suffix; + printf("%s", params.input_suffix.c_str()); + } + + // instruct mode: insert instruction prefix + if (params.instruct && !is_antiprompt) { + n_consumed = embd_inp.size(); + embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end()); + } + + auto line_inp = ::falcon_tokenize(ctx, buffer, false); + embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); + + // instruct mode: insert response suffix + if (params.instruct) { + embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); + } + + n_remain -= line_inp.size(); + } + + input_echo = false; // do not echo this again + } + + if (n_past > 0) { + is_interacting = false; + } + } + + // end of text token + if (!embd.empty() && embd.back() == falcon_token_eos()) { + if (params.instruct) { + is_interacting = true; + } else { + fprintf(stderr, " [end of text]\n"); + break; + } + } + + // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. + if (params.interactive && n_remain <= 0 && params.n_predict != -1) { + n_remain = params.n_predict; + is_interacting = true; + } + } + + if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) { + fprintf(stderr, "\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str()); + llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); + } + + falcon_print_timings(ctx); + llama_free(ctx); + + return 0; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon_common.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1e323dbce980f8d760f1d51c0dadb1ba64d0a901 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_common.cpp @@ -0,0 +1,938 @@ +#include "falcon_common.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__APPLE__) && defined(__MACH__) +#include +#include +#endif + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#include +#include +#else +#include +#include +#include +#endif + +int32_t get_num_physical_cores() { +#ifdef __linux__ + // enumerate the set of thread siblings, num entries is num cores + std::unordered_set siblings; + for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) { + std::ifstream thread_siblings("/sys/devices/system/cpu" + + std::to_string(cpu) + "/topology/thread_siblings"); + if (!thread_siblings.is_open()) { + break; // no more cpus + } + std::string line; + if (std::getline(thread_siblings, line)) { + siblings.insert(line); + } + } + if (siblings.size() > 0) { + return static_cast(siblings.size()); + } +#elif defined(__APPLE__) && defined(__MACH__) + int32_t num_physical_cores; + size_t len = sizeof(num_physical_cores); + int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } + result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } +#elif defined(_WIN32) + //TODO: Implement +#endif + unsigned int n_threads = std::thread::hardware_concurrency(); + return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; +} + +void process_escapes(std::string& input) { + std::size_t input_len = input.length(); + std::size_t output_idx = 0; + + for (std::size_t input_idx = 0; input_idx < input_len; ++input_idx) { + if (input[input_idx] == '\\' && input_idx + 1 < input_len) { + switch (input[++input_idx]) { + case 'n': input[output_idx++] = '\n'; break; + case 'r': input[output_idx++] = '\r'; break; + case 't': input[output_idx++] = '\t'; break; + case '\'': input[output_idx++] = '\''; break; + case '\"': input[output_idx++] = '\"'; break; + case '\\': input[output_idx++] = '\\'; break; + default: input[output_idx++] = '\\'; + input[output_idx++] = input[input_idx]; break; + } + } else { + input[output_idx++] = input[input_idx]; + } + } + + input.resize(output_idx); +} + +bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { + bool invalid_param = false; + bool escape_prompt = false; + std::string arg; + gpt_params default_params; + const std::string arg_prefix = "--"; + + for (int i = 1; i < argc; i++) { + arg = argv[i]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); + } + + if (arg == "-s" || arg == "--seed") { +#if defined(GGML_USE_CUBLAS) + fprintf(stderr, "WARNING: when using cuBLAS generation results are NOT guaranteed to be reproducible.\n"); +#endif + if (++i >= argc) { + invalid_param = true; + break; + } + params.seed = std::stoi(argv[i]); + } else if (arg == "-t" || arg == "--threads") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_threads = std::stoi(argv[i]); + } else if (arg == "-p" || arg == "--prompt") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.prompt = argv[i]; + } else if (arg == "-e") { + escape_prompt = true; + } else if (arg == "--prompt-cache") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.path_prompt_cache = argv[i]; + } else if (arg == "--prompt-cache-all") { + params.prompt_cache_all = true; + } else if (arg == "--prompt-cache-ro") { + params.prompt_cache_ro = true; + } else if (arg == "-f" || arg == "--file") { + if (++i >= argc) { + invalid_param = true; + break; + } + std::ifstream file(argv[i]); + if (!file) { + fprintf(stderr, "error: failed to open file '%s'\n", argv[i]); + invalid_param = true; + break; + } + std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); + if (params.prompt.back() == '\n') { + params.prompt.pop_back(); + } + } else if (arg == "-n" || arg == "--n-predict") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_predict = std::stoi(argv[i]); + } else if (arg == "--top-k") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.top_k = std::stoi(argv[i]); + } else if (arg == "-c" || arg == "--ctx-size") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_ctx = std::stoi(argv[i]); + } else if (arg == "--memory-f32") { + params.memory_f16 = false; + } else if (arg == "--top-p") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.top_p = std::stof(argv[i]); + } else if (arg == "--temp") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.temp = std::stof(argv[i]); + } else if (arg == "--tfs") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.tfs_z = std::stof(argv[i]); + } else if (arg == "--typical") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.typical_p = std::stof(argv[i]); + } else if (arg == "--repeat-last-n") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.repeat_last_n = std::stoi(argv[i]); + } else if (arg == "--repeat-penalty") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.repeat_penalty = std::stof(argv[i]); + } else if (arg == "--frequency-penalty") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.frequency_penalty = std::stof(argv[i]); + } else if (arg == "--presence-penalty") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.presence_penalty = std::stof(argv[i]); + } else if (arg == "--mirostat") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.mirostat = std::stoi(argv[i]); + } else if (arg == "--mirostat-lr") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.mirostat_eta = std::stof(argv[i]); + } else if (arg == "--mirostat-ent") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.mirostat_tau = std::stof(argv[i]); + } else if (arg == "-b" || arg == "--batch-size") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_batch = std::stoi(argv[i]); + params.n_batch = std::min(512, params.n_batch); + } else if (arg == "--keep") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_keep = std::stoi(argv[i]); + } else if (arg == "-m" || arg == "--model") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.model = argv[i]; + } else if (arg == "-a" || arg == "--alias") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.model_alias = argv[i]; + } else if (arg == "--lora") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.lora_adapter = argv[i]; + params.use_mmap = false; + } else if (arg == "--lora-base") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.lora_base = argv[i]; + } else if (arg == "-i" || arg == "--interactive") { + params.interactive = true; + } else if (arg == "--embedding") { + params.embedding = true; + } else if (arg == "--interactive-first") { + params.interactive_first = true; + } else if (arg == "-ins" || arg == "--instruct") { + params.instruct = true; + } else if (arg == "--multiline-input") { + params.multiline_input = true; + } else if (arg == "--color") { + params.use_color = true; + } else if (arg == "--mlock") { + params.use_mlock = true; + } else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") { + if (++i >= argc) { + invalid_param = true; + break; + } +#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD + params.n_gpu_layers = std::stoi(argv[i]); +#else + fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); +#endif + } else if (arg == "--main-gpu" || arg == "-mg") { + if (++i >= argc) { + invalid_param = true; + break; + } +#ifdef GGML_USE_CUBLAS + params.main_gpu = std::stoi(argv[i]); +#else + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.\n"); +#endif + } else if (arg == "--tensor-split" || arg == "-ts") { + if (++i >= argc) { + invalid_param = true; + break; + } +#ifdef GGML_USE_CUBLAS + std::string arg_next = argv[i]; + + // split string by , and / + const std::regex regex{R"([,/]+)"}; + std::sregex_token_iterator it{arg_next.begin(), arg_next.end(), regex, -1}; + std::vector split_arg{it, {}}; + GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES); + + for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) { + if (i < split_arg.size()) { + params.tensor_split[i] = std::stof(split_arg[i]); + } else { + params.tensor_split[i] = 0.0f; + } + } +#else + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n"); +#endif // GGML_USE_CUBLAS + } else if (arg == "--no-mmap") { + params.use_mmap = false; + } else if (arg == "--mtest") { + params.mem_test = true; + } else if (arg == "--export") { + params.export_cgraph = true; + } else if (arg == "--debug-timings" || arg == "-dt") { + if (++i >= argc) { + params.debug_timings = 1; + } else + params.debug_timings = std::stoi(argv[i]); + } else if (arg == "-r" || arg == "--reverse-prompt") { + } else if (arg == "--verbose-prompt") { + params.verbose_prompt = true; + } else if (arg == "-r" || arg == "--reverse-prompt") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.antiprompt.push_back(argv[i]); + } else if (arg == "--perplexity") { + params.perplexity = true; + } else if (arg == "--ignore-eos") { + params.logit_bias[falcon_token_eos()] = -INFINITY; + } else if (arg == "--no-penalize-nl") { + params.penalize_nl = false; + } else if (arg == "-l" || arg == "--logit-bias") { + if (++i >= argc) { + invalid_param = true; + break; + } + std::stringstream ss(argv[i]); + llama_token key; + char sign; + std::string value_str; + try { + if (ss >> key && ss >> sign && std::getline(ss, value_str) && (sign == '+' || sign == '-')) { + params.logit_bias[key] = std::stof(value_str) * ((sign == '-') ? -1.0f : 1.0f); + } else { + throw std::exception(); + } + } catch (const std::exception &e) { + invalid_param = true; + break; + } + } else if (arg == "-h" || arg == "--help") { + gpt_print_usage(argc, argv, default_params); + exit(0); + } else if (arg == "--random-prompt") { + params.random_prompt = true; + } else if (arg == "--in-prefix") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.input_prefix = argv[i]; + } else if (arg == "--in-suffix") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.input_suffix = argv[i]; + } else { + fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); + gpt_print_usage(argc, argv, default_params); + exit(1); + } + } + if (invalid_param) { + fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str()); + gpt_print_usage(argc, argv, default_params); + exit(1); + } + if (params.prompt_cache_all && + (params.interactive || params.interactive_first || + params.instruct)) { + fprintf(stderr, "error: --prompt-cache-all not supported in interactive mode yet\n"); + gpt_print_usage(argc, argv, default_params); + exit(1); + } + if (escape_prompt) { + process_escapes(params.prompt); + } + + return true; +} + +void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { + fprintf(stderr, "usage: %s [options]\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "options:\n"); + fprintf(stderr, " -h, --help show this help message and exit\n"); + fprintf(stderr, " -i, --interactive run in interactive mode\n"); + fprintf(stderr, " --interactive-first run in interactive mode and wait for input right away\n"); + fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n"); + fprintf(stderr, " --multiline-input allows you to write or paste multiple lines without ending each in '\\'\n"); + fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n"); + fprintf(stderr, " halt generation at PROMPT, return control in interactive mode\n"); + fprintf(stderr, " (can be specified more than once for multiple prompts).\n"); + fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n"); + fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n"); + fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); + fprintf(stderr, " -p PROMPT, --prompt PROMPT\n"); + fprintf(stderr, " prompt to start generation with (default: empty)\n"); + fprintf(stderr, " -e process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n"); + fprintf(stderr, " --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n"); + fprintf(stderr, " --prompt-cache-all if specified, saves user input and generations to cache as well.\n"); + fprintf(stderr, " not supported with --interactive or other interactive options\n"); + fprintf(stderr, " --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n"); + fprintf(stderr, " --random-prompt start with a randomized prompt.\n"); + fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n"); + fprintf(stderr, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n"); + fprintf(stderr, " -f FNAME, --file FNAME\n"); + fprintf(stderr, " prompt file to start generation.\n"); + fprintf(stderr, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict); + fprintf(stderr, " --top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k); + fprintf(stderr, " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); + fprintf(stderr, " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); + fprintf(stderr, " --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); + fprintf(stderr, " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); + fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); + fprintf(stderr, " --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); + fprintf(stderr, " --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); + fprintf(stderr, " --mirostat N use Mirostat sampling.\n"); + fprintf(stderr, " Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"); + fprintf(stderr, " (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); + fprintf(stderr, " --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); + fprintf(stderr, " --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); + fprintf(stderr, " -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n"); + fprintf(stderr, " modifies the likelihood of token appearing in the completion,\n"); + fprintf(stderr, " i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n"); + fprintf(stderr, " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n"); + fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx); + fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n"); + fprintf(stderr, " --no-penalize-nl do not penalize newline token\n"); + fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n"); + fprintf(stderr, " not recommended: doubles context memory required and no measurable increase in quality\n"); + fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp); + fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); + fprintf(stderr, " --perplexity compute perplexity over the prompt\n"); + fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); + if (llama_mlock_supported()) { + fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n"); + } + if (llama_mmap_supported()) { + fprintf(stderr, " --no-mmap Do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); + } +#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD + fprintf(stderr, " -ngl N, --n-gpu-layers N\n"); + fprintf(stderr, " number of layers to store in VRAM\n"); + fprintf(stderr, " -ts SPLIT --tensor-split SPLIT\n"); + fprintf(stderr, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); + fprintf(stderr, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n" ); +#endif + fprintf(stderr, " --mtest compute maximum memory usage\n"); + fprintf(stderr, " --export export the computation graph to 'llama.ggml'\n"); + fprintf(stderr, " --verbose-prompt print prompt before generation\n"); + fprintf(stderr, " -dt, --debug-timings print GGML_PERF debug output (requires GGML_PERF=1 for timings)\n"); + fprintf(stderr, " 1 = print first layer, 2 = print first and last layer, 3+ = all layers\n"); + fprintf(stderr, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n"); + fprintf(stderr, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n"); + fprintf(stderr, " -m FNAME, --model FNAME\n"); + fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); + fprintf(stderr, "\n"); +} + +std::string gpt_random_prompt(std::mt19937 & rng) { + const int r = rng() % 10; + switch (r) { + case 0: return "So"; + case 1: return "Once upon a time"; + case 2: return "When"; + case 3: return "The"; + case 4: return "After"; + case 5: return "If"; + case 6: return "import"; + case 7: return "He"; + case 8: return "She"; + case 9: return "They"; + default: return "To"; + } + + return "The"; +} + +// TODO: not great allocating this every time +std::vector falcon_tokenize(struct falcon_context * ctx, const std::string & text, bool add_bos) { + // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars + std::vector res(text.size() + (int) add_bos); + const int n = falcon_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos); + assert(n >= 0); + res.resize(n); + + return res; +} + +struct falcon_context * falcon_init_from_gpt_params(const gpt_params & params) { + auto lparams = falcon_context_default_params(); + + lparams.n_ctx = params.n_ctx; + lparams.n_batch = params.n_batch; + lparams.n_gpu_layers = params.n_gpu_layers; + lparams.main_gpu = params.main_gpu; + memcpy(lparams.tensor_split, params.tensor_split, LLAMA_MAX_DEVICES*sizeof(float)); + lparams.seed = params.seed; + lparams.f16_kv = false; //params.memory_f16; // TODO? unsupported because ggml_repeat2 currently only implemented for f32 + lparams.use_mmap = params.use_mmap; + lparams.use_mlock = params.use_mlock; + lparams.logits_all = params.perplexity; + lparams.embedding = params.embedding; + + falcon_context * lctx = falcon_init_from_file(params.model.c_str(), lparams); + + if (lctx == NULL) { + fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); + return NULL; + } + + // if (!params.lora_adapter.empty()) { + // int err = llama_apply_lora_from_file(lctx, + // params.lora_adapter.c_str(), + // params.lora_base.empty() ? NULL : params.lora_base.c_str(), + // params.n_threads); + // if (err != 0) { + // fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__); + // return NULL; + // } + // } + + return lctx; +} + +void console_init(console_state & con_st) { +#if defined(_WIN32) + // Windows-specific console initialization + DWORD dwMode = 0; + con_st.hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + if (con_st.hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(con_st.hConsole, &dwMode)) { + con_st.hConsole = GetStdHandle(STD_ERROR_HANDLE); + if (con_st.hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(con_st.hConsole, &dwMode))) { + con_st.hConsole = NULL; + } + } + if (con_st.hConsole) { + // Enable ANSI colors on Windows 10+ + if (con_st.use_color && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) { + SetConsoleMode(con_st.hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING); + } + // Set console output codepage to UTF8 + SetConsoleOutputCP(CP_UTF8); + } + HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE); + if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) { + // Set console input codepage to UTF16 + _setmode(_fileno(stdin), _O_WTEXT); + + // Turn off ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT) + dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT); + SetConsoleMode(hConIn, dwMode); + } +#else + // POSIX-specific console initialization + struct termios new_termios; + tcgetattr(STDIN_FILENO, &con_st.prev_state); + new_termios = con_st.prev_state; + new_termios.c_lflag &= ~(ICANON | ECHO); + new_termios.c_cc[VMIN] = 1; + new_termios.c_cc[VTIME] = 0; + tcsetattr(STDIN_FILENO, TCSANOW, &new_termios); + + con_st.tty = fopen("/dev/tty", "w+"); + if (con_st.tty != nullptr) { + con_st.out = con_st.tty; + } + + setlocale(LC_ALL, ""); +#endif +} + +void console_cleanup(console_state & con_st) { + // Reset console color + console_set_color(con_st, CONSOLE_COLOR_DEFAULT); + +#if !defined(_WIN32) + if (con_st.tty != nullptr) { + con_st.out = stdout; + fclose(con_st.tty); + con_st.tty = nullptr; + } + // Restore the terminal settings on POSIX systems + tcsetattr(STDIN_FILENO, TCSANOW, &con_st.prev_state); +#endif +} + +/* Keep track of current color of output, and emit ANSI code if it changes. */ +void console_set_color(console_state & con_st, console_color_t color) { + if (con_st.use_color && con_st.color != color) { + fflush(stdout); + switch(color) { + case CONSOLE_COLOR_DEFAULT: + fprintf(con_st.out, ANSI_COLOR_RESET); + break; + case CONSOLE_COLOR_PROMPT: + fprintf(con_st.out, ANSI_COLOR_YELLOW); + break; + case CONSOLE_COLOR_USER_INPUT: + fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN); + break; + case CONSOLE_COLOR_ERROR: + fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED); + break; + } + con_st.color = color; + fflush(con_st.out); + } +} + +char32_t getchar32() { +#if defined(_WIN32) + HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE); + wchar_t high_surrogate = 0; + + while (true) { + INPUT_RECORD record; + DWORD count; + if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) { + return WEOF; + } + + if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) { + wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar; + if (wc == 0) { + continue; + } + + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + high_surrogate = wc; + continue; + } else if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate + if (high_surrogate != 0) { // Check if we have a high surrogate + return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000; + } + } + + high_surrogate = 0; // Reset the high surrogate + return static_cast(wc); + } + } +#else + wchar_t wc = getwchar(); + if (static_cast(wc) == WEOF) { + return WEOF; + } + +#if WCHAR_MAX == 0xFFFF + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + wchar_t low_surrogate = getwchar(); + if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate + return (static_cast(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000; + } + } + if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair + return 0xFFFD; // Return the replacement character U+FFFD + } +#endif + + return static_cast(wc); +#endif +} + +void pop_cursor(console_state & con_st) { +#if defined(_WIN32) + if (con_st.hConsole != NULL) { + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo); + + COORD newCursorPosition = bufferInfo.dwCursorPosition; + if (newCursorPosition.X == 0) { + newCursorPosition.X = bufferInfo.dwSize.X - 1; + newCursorPosition.Y -= 1; + } else { + newCursorPosition.X -= 1; + } + + SetConsoleCursorPosition(con_st.hConsole, newCursorPosition); + return; + } +#endif + putc('\b', con_st.out); +} + +int estimateWidth(char32_t codepoint) { +#if defined(_WIN32) + return 1; +#else + return wcwidth(codepoint); +#endif +} + +int put_codepoint(console_state & con_st, const char* utf8_codepoint, size_t length, int expectedWidth) { +#if defined(_WIN32) + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + if (!GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo)) { + // go with the default + return expectedWidth; + } + COORD initialPosition = bufferInfo.dwCursorPosition; + DWORD nNumberOfChars = length; + WriteConsole(con_st.hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL); + + CONSOLE_SCREEN_BUFFER_INFO newBufferInfo; + GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo); + + // Figure out our real position if we're in the last column + if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) { + DWORD nNumberOfChars; + WriteConsole(con_st.hConsole, &" \b", 2, &nNumberOfChars, NULL); + GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo); + } + + int width = newBufferInfo.dwCursorPosition.X - initialPosition.X; + if (width < 0) { + width += newBufferInfo.dwSize.X; + } + return width; +#else + // we can trust expectedWidth if we've got one + if (expectedWidth >= 0 || con_st.tty == nullptr) { + fwrite(utf8_codepoint, length, 1, con_st.out); + return expectedWidth; + } + + fputs("\033[6n", con_st.tty); // Query cursor position + int x1, x2, y1, y2; + int results = 0; + results = fscanf(con_st.tty, "\033[%d;%dR", &y1, &x1); + + fwrite(utf8_codepoint, length, 1, con_st.tty); + + fputs("\033[6n", con_st.tty); // Query cursor position + results += fscanf(con_st.tty, "\033[%d;%dR", &y2, &x2); + + if (results != 4) { + return expectedWidth; + } + + int width = x2 - x1; + if (width < 0) { + // Calculate the width considering text wrapping + struct winsize w; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + width += w.ws_col; + } + return width; +#endif +} + +void replace_last(console_state & con_st, char ch) { +#if defined(_WIN32) + pop_cursor(con_st); + put_codepoint(con_st, &ch, 1, 1); +#else + fprintf(con_st.out, "\b%c", ch); +#endif +} + +void append_utf8(char32_t ch, std::string & out) { + if (ch <= 0x7F) { + out.push_back(static_cast(ch)); + } else if (ch <= 0x7FF) { + out.push_back(static_cast(0xC0 | ((ch >> 6) & 0x1F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0xFFFF) { + out.push_back(static_cast(0xE0 | ((ch >> 12) & 0x0F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0x10FFFF) { + out.push_back(static_cast(0xF0 | ((ch >> 18) & 0x07))); + out.push_back(static_cast(0x80 | ((ch >> 12) & 0x3F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else { + // Invalid Unicode code point + } +} + +// Helper function to remove the last UTF-8 character from a string +void pop_back_utf8_char(std::string & line) { + if (line.empty()) { + return; + } + + size_t pos = line.length() - 1; + + // Find the start of the last UTF-8 character (checking up to 4 bytes back) + for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) { + if ((line[pos] & 0xC0) != 0x80) break; // Found the start of the character + } + line.erase(pos); +} + +bool console_readline(console_state & con_st, std::string & line) { + console_set_color(con_st, CONSOLE_COLOR_USER_INPUT); + if (con_st.out != stdout) { + fflush(stdout); + } + + line.clear(); + std::vector widths; + bool is_special_char = false; + bool end_of_stream = false; + + char32_t input_char; + while (true) { + fflush(con_st.out); // Ensure all output is displayed before waiting for input + input_char = getchar32(); + + if (input_char == '\r' || input_char == '\n') { + break; + } + + if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) { + end_of_stream = true; + break; + } + + if (is_special_char) { + console_set_color(con_st, CONSOLE_COLOR_USER_INPUT); + replace_last(con_st, line.back()); + is_special_char = false; + } + + if (input_char == '\033') { // Escape sequence + char32_t code = getchar32(); + if (code == '[' || code == 0x1B) { + // Discard the rest of the escape sequence + while ((code = getchar32()) != (char32_t) WEOF) { + if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') { + break; + } + } + } + } else if (input_char == 0x08 || input_char == 0x7F) { // Backspace + if (!widths.empty()) { + int count; + do { + count = widths.back(); + widths.pop_back(); + // Move cursor back, print space, and move cursor back again + for (int i = 0; i < count; i++) { + replace_last(con_st, ' '); + pop_cursor(con_st); + } + pop_back_utf8_char(line); + } while (count == 0 && !widths.empty()); + } + } else { + int offset = line.length(); + append_utf8(input_char, line); + int width = put_codepoint(con_st, line.c_str() + offset, line.length() - offset, estimateWidth(input_char)); + if (width < 0) { + width = 0; + } + widths.push_back(width); + } + + if (!line.empty() && (line.back() == '\\' || line.back() == '/')) { + console_set_color(con_st, CONSOLE_COLOR_PROMPT); + replace_last(con_st, line.back()); + is_special_char = true; + } + } + + bool has_more = con_st.multiline_input; + if (is_special_char) { + replace_last(con_st, ' '); + pop_cursor(con_st); + + char last = line.back(); + line.pop_back(); + if (last == '\\') { + line += '\n'; + fputc('\n', con_st.out); + has_more = !has_more; + } else { + // llama will just eat the single space, it won't act as a space + if (line.length() == 1 && line.back() == ' ') { + line.clear(); + pop_cursor(con_st); + } + has_more = false; + } + } else { + if (end_of_stream) { + has_more = false; + } else { + line += '\n'; + fputc('\n', con_st.out); + } + } + + fflush(con_st.out); + return has_more; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon_common.h b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_common.h new file mode 100644 index 0000000000000000000000000000000000000000..312e2827dc946cad9718bdf9ad94b9defb5d545c --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_common.h @@ -0,0 +1,137 @@ +// Various helper functions and utilities + +#pragma once + +#include "libfalcon.h" + +#include +#include +#include +#include +#include + +#if !defined (_WIN32) +#include +#include +#endif + +// +// CLI argument parsing +// +int32_t get_num_physical_cores(); + +struct gpt_params { + int32_t seed = -1; // RNG seed + int32_t n_threads = get_num_physical_cores(); + int32_t n_predict = -1; // new tokens to predict + int32_t n_ctx = 512; // context size + int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_keep = 0; // number of tokens to keep from initial prompt + int32_t n_gpu_layers = 0; // number of layers to store in VRAM + int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors + float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs + + // sampling parameters + std::unordered_map logit_bias; // logit bias for specific tokens + int32_t top_k = 40; // <= 0 to use vocab size + float top_p = 0.95f; // 1.0 = disabled + float tfs_z = 1.00f; // 1.0 = disabled + float typical_p = 1.00f; // 1.0 = disabled + float temp = 0.80f; // 1.0 = disabled + float repeat_penalty = 1.10f; // 1.0 = disabled + int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) + float frequency_penalty = 0.00f; // 0.0 = disabled + float presence_penalty = 0.00f; // 0.0 = disabled + int mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 + float mirostat_tau = 5.00f; // target entropy + float mirostat_eta = 0.10f; // learning rate + + std::string model = "models/7B/ggml-model.bin"; // model path + std::string model_alias = "unknown"; // model alias + std::string prompt = ""; + std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state + std::string input_prefix = ""; // string to prefix user inputs with + std::string input_suffix = ""; // string to suffix user inputs with + std::vector antiprompt; // string upon seeing which more user input is prompted + + std::string lora_adapter = ""; // lora adapter path + std::string lora_base = ""; // base model path for the lora adapter + + bool memory_f16 = true; // use f16 instead of f32 for memory kv + bool random_prompt = false; // do not randomize prompt if none provided + bool use_color = false; // use color to distinguish generations and inputs + bool interactive = false; // interactive mode + bool prompt_cache_all = false; // save user input and generations to prompt cache + bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it + + bool embedding = false; // get only sentence embedding + bool interactive_first = false; // wait for user input immediately + bool multiline_input = false; // reverse the usage of `\` + + bool instruct = false; // instruction mode (used for Alpaca models) + bool penalize_nl = true; // consider newlines as a repeatable token + bool perplexity = false; // compute perplexity over the prompt + bool use_mmap = true; // use mmap for faster loads + bool use_mlock = false; // use mlock to keep model in memory + bool mem_test = false; // compute maximum memory usage + bool export_cgraph = false; // export the computation graph + bool verbose_prompt = false; // print prompt tokens before generation + int debug_timings = 0; // print timings (required for GGML_PERF=1) +}; + +bool gpt_params_parse(int argc, char ** argv, gpt_params & params); + +void gpt_print_usage(int argc, char ** argv, const gpt_params & params); + +std::string gpt_random_prompt(std::mt19937 & rng); + +// +// Vocab utils +// + +std::vector falcon_tokenize(struct falcon_context * ctx, const std::string & text, bool add_bos); + +// +// Model utils +// + +struct falcon_context * falcon_init_from_gpt_params(const gpt_params & params); + +// +// Console utils +// + +#define ANSI_COLOR_RED "\x1b[31m" +#define ANSI_COLOR_GREEN "\x1b[32m" +#define ANSI_COLOR_YELLOW "\x1b[33m" +#define ANSI_COLOR_BLUE "\x1b[34m" +#define ANSI_COLOR_MAGENTA "\x1b[35m" +#define ANSI_COLOR_CYAN "\x1b[36m" +#define ANSI_COLOR_RESET "\x1b[0m" +#define ANSI_BOLD "\x1b[1m" + +enum console_color_t { + CONSOLE_COLOR_DEFAULT=0, + CONSOLE_COLOR_PROMPT, + CONSOLE_COLOR_USER_INPUT, + CONSOLE_COLOR_ERROR +}; + +struct console_state { + bool multiline_input = false; + bool use_color = false; + console_color_t color = CONSOLE_COLOR_DEFAULT; + + FILE* out = stdout; +#if defined (_WIN32) + void* hConsole; +#else + FILE* tty = nullptr; + termios prev_state; +#endif +}; + +void console_init(console_state & con_st); +void console_cleanup(console_state & con_st); +void console_set_color(console_state & con_st, console_color_t color); +bool console_readline(console_state & con_st, std::string & line); diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon_perplexity/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_perplexity/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e4eb989b47a1b72e0edc7289dd309a4e566aabd --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_perplexity/CMakeLists.txt @@ -0,0 +1,7 @@ +set(TARGET falcon_perplexity) +add_executable(${TARGET} falcon_perplexity.cpp) +target_link_libraries(${TARGET} PRIVATE falcon_common libfalcon ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon_perplexity/README.md b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_perplexity/README.md new file mode 100644 index 0000000000000000000000000000000000000000..eacfb17c67fb2e8a476d9bc58b84a34c9942b24c --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_perplexity/README.md @@ -0,0 +1,3 @@ +# perplexity + +TODO diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon_perplexity/falcon_perplexity.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_perplexity/falcon_perplexity.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5b55bf47216dc62bcaee63ac80c0f4a9c8ff5213 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_perplexity/falcon_perplexity.cpp @@ -0,0 +1,174 @@ +#include "falcon_common.h" +#include "libfalcon.h" +#include "build-info.h" + +#include +#include + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +std::vector softmax(const std::vector& logits) { + std::vector probs(logits.size()); + float max_logit = logits[0]; + for (float v : logits) max_logit = std::max(max_logit, v); + double sum_exp = 0.0; + for (size_t i = 0; i < logits.size(); i++) { + // Subtract the maximum logit value from the current logit value for numerical stability + const float logit = logits[i] - max_logit; + const float exp_logit = expf(logit); + sum_exp += exp_logit; + probs[i] = exp_logit; + } + for (size_t i = 0; i < probs.size(); i++) probs[i] /= sum_exp; + return probs; +} + +void perplexity(falcon_context * ctx, const gpt_params & params) { + // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research + // Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw` + // Output: `perplexity: 13.5106 [114/114]` + // BOS tokens will be added for each chunk before eval + auto tokens = ::falcon_tokenize(ctx, params.prompt, true); + + int count = 0; + + const int n_chunk = tokens.size() / params.n_ctx; + const int n_vocab = falcon_n_vocab(ctx); + const int n_batch = params.n_batch; + + double nll = 0.0; + fprintf(stderr, "%s: calculating perplexity over %d chunks, batch_size=%d\n", __func__, n_chunk, n_batch); + + for (int i = 0; i < n_chunk; ++i) { + const int start = i * params.n_ctx; + const int end = start + params.n_ctx; + + const int num_batches = (params.n_ctx + n_batch - 1) / n_batch; + + std::vector logits; + + const auto t_start = std::chrono::high_resolution_clock::now(); + + for (int j = 0; j < num_batches; ++j) { + const int batch_start = start + j * n_batch; + const int batch_size = std::min(end - batch_start, n_batch); + + // save original token and restore it after eval + const auto token_org = tokens[batch_start]; + + // add BOS token for the first batch of each chunk + if (j == 0) { + // tokens[batch_start] = falcon_token_bos(); + } + + if (falcon_eval(ctx, tokens.data() + batch_start, batch_size, j * n_batch, params.n_threads, params.debug_timings)) { + fprintf(stderr, "%s : failed to eval\n", __func__); + return; + } + + // restore the original token in case it was set to BOS + tokens[batch_start] = token_org; + + const auto batch_logits = falcon_get_logits(ctx); + logits.insert(logits.end(), batch_logits, batch_logits + batch_size * n_vocab); + } + + const auto t_end = std::chrono::high_resolution_clock::now(); + + if (i == 0) { + const float t_total = std::chrono::duration(t_end - t_start).count(); + fprintf(stderr, "%s: %.2f seconds per pass - ETA ", __func__, t_total); + int total_seconds = (int)(t_total * n_chunk); + if (total_seconds >= 60*60) { + fprintf(stderr, "%d hours ", total_seconds / (60*60)); + total_seconds = total_seconds % (60*60); + } + fprintf(stderr, "%d minutes\n", total_seconds / 60); + } + + // We get the logits for all the tokens in the context window (params.n_ctx) + // from llama_eval above. Now, based on https://huggingface.co/docs/transformers/perplexity, + // calculate the perplexity over the last half of the window (so the model always has + // some context to predict the token). + // + // We rely on the fact that attention in the forward pass only looks at previous + // tokens here, so the logits returned for each token are an accurate representation + // of what the model would have predicted at that point. + // + // Example, we have a context window of 512, we will compute perplexity for each of the + // last 256 tokens. Then, we split the input up into context window size chunks to + // process the entire prompt. + for (int j = std::min(512, params.n_ctx / 2); j < params.n_ctx - 1; ++j) { + // Calculate probability of next token, given the previous ones. + const std::vector tok_logits( + logits.begin() + (j + 0) * n_vocab, + logits.begin() + (j + 1) * n_vocab); + + const float prob = softmax(tok_logits)[tokens[start + j + 1]]; + + nll += -std::log(prob); + ++count; + } + // perplexity is e^(average negative log-likelihood) + printf("[%d]%.4lf,", i + 1, std::exp(nll / count)); + fflush(stdout); + } + printf("\n"); +} + +int main(int argc, char ** argv) { + gpt_params params; + + params.n_batch = 512; + if (gpt_params_parse(argc, argv, params) == false) { + return 1; + } + + params.perplexity = true; + params.n_batch = std::min(params.n_batch, params.n_ctx); + + if (params.n_ctx > 2048) { + fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);" + "expect poor results\n", __func__, params.n_ctx); + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + + if (params.seed < 0) { + params.seed = time(NULL); + } + + fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + + std::mt19937 rng(params.seed); + if (params.random_prompt) { + params.prompt = gpt_random_prompt(rng); + } + + llama_init_backend(); + + falcon_context * ctx; + + // load the model and apply lora adapter, if any + ctx = falcon_init_from_gpt_params(params); + if (ctx == NULL) { + fprintf(stderr, "%s: error: unable to load model\n", __func__); + return 1; + } + + // print system information + { + fprintf(stderr, "\n"); + fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", + params.n_threads, std::thread::hardware_concurrency(), falcon_print_system_info()); + } + + perplexity(ctx, params); + + falcon_print_timings(ctx); + llama_free(ctx); + + return 0; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon_quantize/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_quantize/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..32f3104efe8ec881aa27f4f46a0274b2895efe3d --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_quantize/CMakeLists.txt @@ -0,0 +1,7 @@ +set(TARGET falcon_quantize) +add_executable(${TARGET} quantize.cpp) +target_link_libraries(${TARGET} PRIVATE libfalcon ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon_quantize/README.md b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_quantize/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f349e913e3d10dc84ca1e045048e92ede8a3d5d6 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_quantize/README.md @@ -0,0 +1,3 @@ +# quantize + +TODO diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/falcon_quantize/quantize.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_quantize/quantize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3af7f3ffe82f22dbdf25d7f0739d3c223cf2e80b --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/falcon_quantize/quantize.cpp @@ -0,0 +1,261 @@ +#include "build-info.h" + +#include "libfalcon.h" + +#include +#include +#include +#include + +struct quant_option { + std::string name; + llama_ftype ftype; + std::string desc; +}; + +static const std::vector QUANT_OPTIONS = { + { + "Q4_0", + LLAMA_FTYPE_MOSTLY_Q4_0, + " 3.50G, +0.2499 ppl @ 7B - small, very high quality loss - legacy, prefer using Q3_K_M", + }, + { + "Q4_1", + LLAMA_FTYPE_MOSTLY_Q4_1, + " 3.90G, +0.1846 ppl @ 7B - small, substantial quality loss - legacy, prefer using Q3_K_L", + }, + { + "Q5_0", + LLAMA_FTYPE_MOSTLY_Q5_0, + " 4.30G, +0.0796 ppl @ 7B - medium, balanced quality - legacy, prefer using Q4_K_M", + }, + { + "Q5_1", + LLAMA_FTYPE_MOSTLY_Q5_1, + " 4.70G, +0.0415 ppl @ 7B - medium, low quality loss - legacy, prefer using Q5_K_M", + }, +#ifdef GGML_USE_K_QUANTS + { + "Q2_K", + LLAMA_FTYPE_MOSTLY_Q2_K, + " 2.67G, +0.8698 ppl @ 7B - smallest, extreme quality loss - not recommended", + }, + { + "Q3_K", + LLAMA_FTYPE_MOSTLY_Q3_K_M, + "alias for Q3_K_M" + }, + { + "Q3_K_S", + LLAMA_FTYPE_MOSTLY_Q3_K_S, + " 2.75G, +0.5505 ppl @ 7B - very small, very high quality loss", + }, + { + "Q3_K_M", + LLAMA_FTYPE_MOSTLY_Q3_K_M, + " 3.06G, +0.2437 ppl @ 7B - very small, very high quality loss", + }, + { + "Q3_K_L", + LLAMA_FTYPE_MOSTLY_Q3_K_L, + " 3.35G, +0.1803 ppl @ 7B - small, substantial quality loss", + }, + { + "Q4_K", + LLAMA_FTYPE_MOSTLY_Q4_K_M, + "alias for Q4_K_M", + }, + { + "Q4_K_S", + LLAMA_FTYPE_MOSTLY_Q4_K_S, + " 3.56G, +0.1149 ppl @ 7B - small, significant quality loss", + }, + { + "Q4_K_M", + LLAMA_FTYPE_MOSTLY_Q4_K_M, + " 3.80G, +0.0535 ppl @ 7B - medium, balanced quality - *recommended*", + }, + { + "Q5_K", + LLAMA_FTYPE_MOSTLY_Q5_K_M, + "alias for Q5_K_M", + }, + { + "Q5_K_S", + LLAMA_FTYPE_MOSTLY_Q5_K_S, + " 4.33G, +0.0353 ppl @ 7B - large, low quality loss - *recommended*", + }, + { + "Q5_K_M", + LLAMA_FTYPE_MOSTLY_Q5_K_M, + " 4.45G, +0.0142 ppl @ 7B - large, very low quality loss - *recommended*", + }, + { + "Q6_K", + LLAMA_FTYPE_MOSTLY_Q6_K, + " 5.15G, +0.0044 ppl @ 7B - very large, extremely low quality loss", + }, +#endif + { + "Q8_0", + LLAMA_FTYPE_MOSTLY_Q8_0, + " 6.70G, +0.0004 ppl @ 7B - very large, extremely low quality loss - not recommended", + }, + { + "F16", + LLAMA_FTYPE_MOSTLY_F16, + "13.00G @ 7B - extremely large, virtually no quality loss - not recommended", + }, + { + "F32", + LLAMA_FTYPE_ALL_F32, + "26.00G @ 7B - absolutely huge, lossless - not recommended", + }, +}; + + +bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) { + std::string ftype_str; + + for (auto ch : ftype_str_in) { + ftype_str.push_back(std::toupper(ch)); + } + for (auto & it : QUANT_OPTIONS) { + if (it.name == ftype_str) { + ftype = it.ftype; + ftype_str_out = it.name; + return true; + } + } + try { + int ftype_int = std::stoi(ftype_str); + for (auto & it : QUANT_OPTIONS) { + if (it.ftype == ftype_int) { + ftype = it.ftype; + ftype_str_out = it.name; + return true; + } + } + } + catch (...) { + // stoi failed + } + return false; +} + +// usage: +// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads] +// +void usage(const char * executable) { + fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.bin [model-quant.bin] type [nthreads]\n\n", executable); + fprintf(stderr, " --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n"); + fprintf(stderr, " --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n"); + fprintf(stderr, "\nAllowed quantization types:\n"); + for (auto & it : QUANT_OPTIONS) { + printf(" %2d or %-6s : %s\n", it.ftype, it.name.c_str(), it.desc.c_str()); + } + exit(1); +} + +int main(int argc, char ** argv) { + if (argc < 3) { + usage(argv[0]); + } + + llama_model_quantize_params params = llama_model_quantize_default_params(); + + int arg_idx = 1; + + for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) { + if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) { + params.quantize_output_tensor = false; + } else if (strcmp(argv[arg_idx], "--allow-requantize") == 0) { + params.allow_requantize = true; + } else { + usage(argv[0]); + } + } + + if (argc - arg_idx < 3) { + usage(argv[0]); + } + + llama_init_backend(); + + // parse command line arguments + const std::string fname_inp = argv[arg_idx]; + arg_idx++; + std::string fname_out; + + std::string ftype_str; + if (try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) { + std::string fpath; + const size_t pos = fname_inp.find_last_of('/'); + if (pos != std::string::npos) { + fpath = fname_inp.substr(0, pos + 1); + } + // export as [inp path]/ggml-model-[ftype].bin + fname_out = fpath + "ggml-model-" + ftype_str + ".bin"; + arg_idx++; + } + else { + fname_out = argv[arg_idx]; + arg_idx++; + + if (argc <= arg_idx) { + fprintf(stderr, "%s: missing ftype\n", __func__); + return 1; + } + if (!try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) { + fprintf(stderr, "%s: invalid ftype '%s'\n", __func__, argv[3]); + return 1; + } + arg_idx++; + } + + // parse nthreads + if (argc > arg_idx) { + try { + params.nthread = std::stoi(argv[arg_idx]); + } + catch (const std::exception & e) { + fprintf(stderr, "%s: invalid nthread '%s' (%s)\n", __func__, argv[arg_idx], e.what()); + return 1; + } + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + + fprintf(stderr, "%s: quantizing '%s' to '%s' as %s", __func__, fname_inp.c_str(), fname_out.c_str(), ftype_str.c_str()); + if (params.nthread > 0) { + fprintf(stderr, " using %d threads", params.nthread); + } + fprintf(stderr, "\n"); + + const int64_t t_main_start_us = llama_time_us(); + + int64_t t_quantize_us = 0; + + // load the model + { + const int64_t t_start_us = llama_time_us(); + + if (falcon_model_quantize(fname_inp.c_str(), fname_out.c_str(), ¶ms)) { + fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str()); + return 1; + } + + t_quantize_us = llama_time_us() - t_start_us; + } + + // report timing + { + const int64_t t_main_end_us = llama_time_us(); + + printf("\n"); + printf("%s: quantize time = %8.2f ms\n", __func__, t_quantize_us/1000.0); + printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0); + } + + return 0; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/gpt4all.sh b/ctransformers/models/submodules/ggllm.cpp/examples/gpt4all.sh new file mode 100644 index 0000000000000000000000000000000000000000..5fd739e55c554a3e7f855c147789d873fd7aff24 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/gpt4all.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# +# Temporary script - will be removed in the future +# + +cd `dirname $0` +cd .. + +./main --color --instruct --threads 4 \ + --model ./models/gpt4all-7B/gpt4all-lora-quantized.bin \ + --file ./prompts/alpaca.txt \ + --batch_size 8 --ctx_size 2048 -n -1 \ + --repeat_last_n 64 --repeat_penalty 1.3 \ + --n_predict 128 --temp 0.1 --top_k 40 --top_p 0.95 diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/README.md b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4c42e3cdbf5264805981e9ebe88a437e6cdd4aec --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/README.md @@ -0,0 +1,21 @@ +# llama.cpp/example/jeopardy + +This is pretty much just a straight port of aigoopy/llm-jeopardy/ with an added graph viewer. + +The jeopardy test can be used to compare the fact knowledge of different models and compare them to eachother. This is in contrast to some other tests, which test logical deduction, creativity, writing skills, etc. + + +Step 1: Open jeopardy.sh and modify the following: +``` +MODEL=(path to your model) +MODEL_NAME=(name of your model) +prefix=(basically, if you use vicuna it's Human: , if you use something else it might be User: , etc) +opts=(add -instruct here if needed for your model, or anything else you want to test out) +``` +Step 2: Run `jeopardy.sh` from the llama.cpp folder + +Step 3: Repeat steps 1 and 2 until you have all the results you need. + +Step 4: Run `graph.py`, and follow the instructions. At the end, it will generate your final graph. + +Note: The Human bar is based off of the full, original 100 sample questions. If you modify the question count or questions, it will not be valid. diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/graph.py b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..1b6c54bff73d13096140d1de2cd46cdaefc071d5 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/graph.py @@ -0,0 +1,57 @@ +import matplotlib.pyplot as plt +import os +import csv + +labels = [] +numbers = [] +numEntries = 1 + +rows = [] + + +def bar_chart(numbers, labels, pos): + plt.bar(pos, numbers, color='blue') + plt.xticks(ticks=pos, labels=labels) + plt.title("Jeopardy Results by Model") + plt.xlabel("Model") + plt.ylabel("Questions Correct") + plt.show() + + +def calculatecorrect(): + directory = os.fsencode("./examples/jeopardy/results/") + csv_reader = csv.reader(open("./examples/jeopardy/qasheet.csv", 'rt'), delimiter=',') + for row in csv_reader: + global rows + rows.append(row) + for listing in os.listdir(directory): + filename = os.fsdecode(listing) + if filename.endswith(".txt"): + file = open("./examples/jeopardy/results/" + filename, "rt") + global labels + global numEntries + global numbers + labels.append(filename[:-4]) + numEntries += 1 + i = 1 + totalcorrect = 0 + for line in file.readlines(): + if line.strip() != "------": + print(line) + else: + print("Correct answer: " + rows[i][2] + "\n") + i += 1 + print("Did the AI get the question right? (y/n)") + if input() == "y": + totalcorrect += 1 + numbers.append(totalcorrect) + + +if __name__ == '__main__': + calculatecorrect() + pos = list(range(numEntries)) + labels.append("Human") + numbers.append(48.11) + bar_chart(numbers, labels, pos) + print(labels) + print(numbers) diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/jeopardy.sh b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/jeopardy.sh new file mode 100644 index 0000000000000000000000000000000000000000..9bdbc755c13a7894fc0bc48af1d41acabbe6b254 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/jeopardy.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -e + +MODEL=./models/ggml-vicuna-13b-1.1-q4_0.bin +MODEL_NAME=Vicuna + +# exec options +prefix="Human: " # Ex. Vicuna uses "Human: " +opts="--temp 0 -n 80" # additional flags +nl=' +' +introduction="You will be playing a game of Jeopardy. Simply answer the question in the correct format (Ex. What is Paris, or Who is George Washington)." + +# file options +question_file=./examples/jeopardy/questions.txt +touch ./examples/jeopardy/results/$MODEL_NAME.txt +output_file=./examples/jeopardy/results/$MODEL_NAME.txt + +counter=1 + +echo 'Running' +while IFS= read -r question +do + exe_cmd="./main -p "\"$prefix$introduction$nl$prefix$question\"" "$opts" -m ""\"$MODEL\""" >> ""\"$output_file\"" + echo $counter + echo "Current Question: $question" + eval "$exe_cmd" + echo -e "\n------" >> $output_file + counter=$((counter+1)) +done < "$question_file" diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/qasheet.csv b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/qasheet.csv new file mode 100644 index 0000000000000000000000000000000000000000..35b08418956ab5c54d0afa4c3f2b896931272d36 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/qasheet.csv @@ -0,0 +1,103 @@ +Index,Original Category,Original Correct Question,Model Prompt +1,The Oscars,Who is John Williams?,Which actor Born in 1932 was the son of a percussionist in the CBS radio orchestra has been nominated for 53 Oscars? +2,English Literature,What is Paradise Lost?,"What work in English Literature says: 'The mind is its own place, & in itself can make a heaven of hell, a hell of heaven. What matter where, if I be still the same'?" +3,Writers’ Lesser-Known Works,Who is Niccolò Machiavelli?,"Known for more philosophical works, he wrote the play 'La Mandragola', in which Florentines are rewarded for immoral actions?" +4,Exploration,What is Easter Island (Rapa Nui)?,"James Cook's account of a 1774 visit where records an object 'near 27 feet long, and upwards of 8 feet over the breast or shoulders'?" +5,The Bill of Rights,What is the Eighth Amendment?,England's 'Bloody Assizes' & a 1685 life sentence for perjury were 2 main origins of which amendment to the U.S. Constitution? +6,Nobel Peace Prize Winners,Who are Nelson Mandela & Desmond Tutu?,"Which nobel peace price winners each lived at times on Vilakazi St. in Soweto , so it claims to be the world's only street home to 2 Nobel Peace Prize winners?" +7,Famous Names,Who is Walt Disney?,"In 1966, the year of who's death did he share plans for an experimental prototype community in Florida?" +8,Geography,What is Colombia?,"Of the 13 nations through which the Equator passes, what is the only one whose coastline borders the Caribbean Sea?" +9,Fashion History,What are rhinestones?,"Which decorative items in fashion history get their name from their origin in the port city of Strasbourg, on the border of France & Germany?" +10,Movies of the ’80s,What is Driving Miss Daisy?,What 1980's movie is based on an off-Broadway play with just 3 characters and won the Best Picture Oscar & the actors in all 3 roles were nominated? +11,Novelists,Who is John Grisham?,"A 2012 book review for which novelist noted subjects that 'sparked his ire': capital punishment, big tobacco & 'the plight of the unjustly convicted'?" +12,20th Century Eponyms,What is the Maginot Line?,"A 1940 headline about what 20th Century Eponym included 'failure', 'liability when it came to offense' & 'stout hearts no match for tanks'?" +13,City History,What is Stockholm?,"Over 700 years after its traditional 1252 founding date, what port city became associated with a psychological response?" +14,Brand Names,What is Jacuzzi?,"The success of what brand has its roots with a hydrotherapy pump its cofounder created for his son, who had arthritis?" +15,American Authors,Who is Washington Irving?,"In a periodical in 1807, what American Author called New York City 'Gotham, Gotham! Most enlightened of cities'?" +16,Symbols,What is “less than”?,What symbol is a rotated V in math and a feeling of some marginalized or underrepresented people in society? +17,Movie Theme Songs,Who is James Bond?,"Monty Norman, the composer of what character's theme, said the staccato riff conveyed sexiness, mystery & ruthlessness?" +18,American Novelists,Who is Joseph Heller?,"What American Novelist served with an airman named Yohannan in World War II & despite what readers might think, he said he enjoyed his service?" +19,Medieval Places,"What is Canterbury, England? (Canterbury Cathedral)","In what Medieval place did one of the participants in an 1170 event say, 'Let us away, knights; he will rise no more'?" +20,Countries of Africa,What is Morocco?,"At one time a province of the Roman Empire, what African country kingdom is known to Arabic scholars as Al-Maghrib Al-Aqsa, 'the far west'?" +21,Statehood,What is Wyoming?,Congress relented in 1890 after what prospective state said it would wait 100 years rather than come in without the women? +22,1980s Movies,What is Raiders of the Lost Ark?,"A writer & producer of what movie said he wanted it to be like a Western or James Bond film, 'only it takes place in the 30s'?" +23,Art Exhibitions,Who is Rembrandt?,In 1898 what's been called the first blockbuster art show was devoted to which artist & put on for Queen Wilhelmina's coronation? +24,Countries of the World,What is Mongolia?,"Part of the largest contiguous land empire during the 1200s & 1300s, today what is the world's second-largest landlocked country?" +25,Literature,What is “Howl”?,A 2006 book was titled 'The Poem That Changed America:' What 'Fifty Years Later'? +26,Invasions,Who is William of Orange?,"Backed by 14,000 troops, who invaded England to restore, in his words, its 'religion, laws, and liberties'?" +27,Landmarks,What is the Eiffel Tower?,"After its completion in the late 19th c., what was landmark was called 'a truly tragic street lamp' & a 'high & skinny pyramid of iron ladders'?" +28,Geographic Name’s the Same,What is Dover?,"The busiest passenger port in the U.K., what shares its name with a capital of one of the original 13 states?" +29,Names in the Bookstore,Who is Peter Mark Roget?,"This man made lists, perhaps to cope with depression; a set of lists he published in 1852 made whose name synonymous with a type of book?" +30,U.S. History,Who is Dr. Samuel Mudd?,"An 1869 presidential pardon was granted to which man, due in part to a plea by the Medical Society of Harford County, Maryland?" +31,American Literature,What is The Things They Carried?,"Letters, pocket knives, C rations & steel helmets are among the tangible items referred to in the title of what American literature modern war classic?" +32,Nonfiction,What is The Communist Manifesto,"What nonfiction book has the line, 'The discovery of America…opened up fresh ground for the rising bourgeoisie'?" +33, a new version was passed 81 years later,Laws in U.S. History,What is the Civil Rights Act?,,,,,,,,,,,,,,,,,,0, 2/3 +34,Names of Myth,Who is Helen of Troy?,"Whose brothers, Castor & Pollux, saved her after Theseus stole her away as a kid; a larger force would seek her later in life?" +35,African Countries,What is Sudan?,"Once Africa's largest country in area, what African Country dropped to third in 2011 when a portion of it declared independence?" +36,The Ancient World,What is Alexandria?,"The ancient writer Galen said books on ships arriving to what city's port were seized, originals kept & copies returned?" +37,Famous Names,Who is Andy Warhol?,"For a special 1970s cookbook, who provided one simple recipe–a can of Campbell's tomato soup & 2 cans of milk?" +38,People & Places,What is Guam?,"Thought to descend from people of Southeast Asia, the Chamorro make up what U.S. territory’s largest ethnic group?" +39,Current World Leaders,What is the Philippines?,"In office from 2022, the president of what country has taken so many foreign trips a play on his name is 'Ferdinand Magellan Jr.'?" +40,Writers & The South,Who is Tennessee Williams?,In 1939 which writer lived on Toulouse Street in the French Quarter & chose the professional name that bonded him to the South? +41,National Parks,What is Yellowstone?,"What National Park is named for a river indigenous people called Mi tse a-da-zi, translated by French-speaking trappers as 'Pierre Jaune'?" +42,Sports,Who are the Harlem Globetrotters?,"In 2010 who introduced the 4-point shot, 35 feet from the basket?" +43,The U.S. Military,What is “Top Gun”?,Losses over Asia in the 1960s led to the establishment of the program known as what at a San Diego naval base in 1969? +44,Art & Science,What is Halley’s Comet?,"A craft that visited what was named for Giotto, based on the story that 680 years earlier, the painter depicted it as the Star of Bethlehem?" +45,Words From World War I,What is “tank”?,"In World War I, 'Cistern' & 'reservoir' were suggested names for what secret invention, but the British preferred this less clumsy monosyllable?" +46,European History,What is Holy Roman Emperor?,"Until 1806, some German nobles included among their honors the title of 'Elector' for their role in selecting this personage?" +47,Theater History,Who is Peter Pan?,"In 1904, wearing a harness, actress Nina Boucicault became the first to play what character onstage?" +48,European Cities,What is Aachen?,"Alphabetically the first German city in encyclopedias, what was also the first one taken by the Allies in World War II?" +49,Word Origins,What is mantra?,This Sanskrit word referring to a spoken word or phrase comes from a word for 'to think'? +50,Inventions,What is barbed wire?,1917's 'Elements of Trench Warfare' said what Old West invention was 'difficult to destroy' & 'difficult to get through'? +51,World War II,What is Schindler’s list?,"Mimi Reinhard, who never learned to type using more than 2 fingers, produced what in World War II with 1,100 names, including hers?" +52, their offspring was the source of this mythical object,Mythology,What is the Golden Fleece? +53,Literature,What is Pride and Prejudice?,"Published in 2011, P.D. James' final novel, 'Death Comes to Pemberley', was a sequel to what novel from 200 years earlier?" +54, only these 2 west of the Mississippi River border each other,U.S. State Names,What are Oregon & Nevada? +55,Word Origins,What is passion?,"Originally relating to a story of suffering, what word now more commonly refers to strong emotion of any kind?" +56,World Cinema,What is La Vie en Rose?,"The 2007 biopic called 'La Môme' in France, meaning 'The Kid', was released in the U.S. under what other French title?" +57,History,What is Santa Maria?,"Returning home in 1493, Columbus stopped in the Azores at an island with what name, also something he'd lost off the Haiti coast?" +58,Landmarks,What is a kremlin?,Pskov & Nizhny Novgorod are 2 of the cities that have a fortress called what? +59,Foreign-Born Authors,Who is Vladimir Nabokov?,In the 1950s the New York Times said what author 'is writing about all lust' & his lecherous narrator 'is all of us'? +60,Astronomy & Geography,What is Capricorn?,"At the winter solstice, the sun is in Sagittarius; it once appeared in what constellation, giving a geographic feature its name?" +61,Television,What is Law & Order?,"Mike Post combined the sound of a slamming jail door, an anvil & 100 men stomping on a floor for what television series that debuted in 1990?" +62,British Landmarks,What is the Tower of London?,"Like Sir Thomas More, 3 16th century English queens are buried at what British location?" +63,Early American History,What are witches?,"In 1692 Increase Mather wrote, 'It were better that ten suspected' of these who 'escape, than that one innocent person … be condemned'?" +64,Geography Mnemonics,What are Arkansas and Louisiana?,"The Geography Mnemonic Mimal, sometimes said to be the silhouette of a chef or elf, stands for Minnesota, Iowa, Missouri, and what other 2 states?" +65,Business Milestones,What is the Ford Model T?,"What was first sold in 1908, at a price equivalent to about $27,000 today?" +66,In The Bookstore,Who is Tom Clancy?,The name of what author dead since 2013 now appears on books written by a former U.S. marshal & a former Apache helicopter pilot? +67,Historic Art,What is the Bayeux Tapestry?,The artwork once known in France as 'la tapisserie de la Reine Mathilde' is better known as what? +68,Pop Stars,Who is Madonna?,In 2022 which pop star became the first woman to have a Billboard Top 10 album in 5 decades starting with the 1980s? +69,Classic Tale Characters,Who is Scheherazade?,"In one 19th century translation, what female classic tale character 'perceived the dawn of day and ceased' speaking nearly 1,000 times?" +70,USA,What is Jack Daniel’s?,"Ironically, though what company founded in the 1860s is Moore County, Tennessee's largest employer, Moore is a dry county?" +71,Historic People,Who was William Bligh?,"After a 1789 event, who wrote, 'My first determination was to seek a supply of…water at Tofoa, & afterwards to sail for Tongataboo'?" +72,The Movies,What is The Godfather?,Laurence Olivier & Ernest Borgnine were considered for the lead role & Sergio Leone to direct for what film that turned 50 in 2022? +73,Continental Geography,What is Colombia?,"Until a 1903 secession, what country's contiguous territory spanned 2 continents?" +74,Foreign-Born Authors,Who is Isabel Allende?,"Early in her career which foreign-born author translated romance novels into Spanish, often changing the dialogue to make the heroines smarter?" +75,Historic Crimes,What is the Mona Lisa?,"Saying it was stolen by Napoleon, self-styled Italian patriot Vincenzo Peruggia took what in 1911?" +76,U.S. Bodies of Water,What is Lake Mead?,"Continuing a downward trend, in July 2022 what US body of water was at 27% capacity, its lowest level since 1937 when it was first being filled?" +77,Gods & Goddesses,Who is Aurora (or Eos)?,"Each morning which goddess began her ride in her chariot across the sky ahead of her brother Sol, or Helios?" +78,America At War,What is the Battle of New Orleans?,"Until the Civil War, the Jan. 8 date of what American battle of dubious military importance but big morale value was a national holiday?" +79,Children’s Books,What is The Velveteen Rabbit?,"Which children's book title character is told 'By the time you are real, most of your hair has been loved off your eyes drop out & you get shabby'?" +80,TV Finales,What is Grace and Frankie?,"In a TV reunion over 40 years in the making, Dolly Parton appeared as an angel named Agnes in the final episode of what comedy in 2022?" +81,American Poems,Who is Evangeline?,"In an 1847 American poem what character sees her town of Grand-Pré burned, but finally reunites with her beau for a kiss before his death?" +82,Famous Names,Who is Banksy?,"In 2001 who published a book called 'Banging Your Head Against a Brick Wall'; in 2002, 'Existencilism'?" +83,Children’s Lit,What is Charlotte’s Web?,The title object of what childrens book 'never looked more beautiful each strand held dozens of bright drops of early morning dew'? +84,Classic Songs,What is “Here Comes Santa Claus”?,The shouts of excited children at a 1946 holiday parade are said to have inspired what perennial classic song favorite? +85,Brand Names,What are Milk Duds?,"Unable to make what candies perfectly round, the confectioner embraced this flawed name for the product?" +86,Countries of the World,What is Italy?,"What country is home to 58 UNESCO World Heritage Sites, more than any other country; the sites include a volcano & a lagoon?" +87,Action Movies,What is Die Hard?,"What action movie's last line is 'If this is their idea of Christmas, I gotta be here for New Years'?" +88,Presidential Facts,Who is Woodrow Wilson?,Only 3 presidents have married while in office— John Tyler was the first & which one was the last? +89,19th Century Americans,Who is Frederick Douglass?,"Demonstrating the dignity & humanity of Black Americans, who sat for 160 known photographs, the most of any American in the 19th century?" +90,Latin Phrases,What is “quid pro quo”?,"Originally, which Latin 3-word phrase referred to when a doctor or apothecary substituted one medicine for another?" +91,1970s Movies,What is Monty Python and the Holy Grail?,The 1975 premiere of what movie comedy advertised free coconuts for the first thousand in the audience? +92,Name’s The Same,What is Manhattan?,"A cocktail, an island & a WWII venture originally called 'Development of Substitute Materials' all bear what name?" +93,U.S. Presidents,Who is Calvin Coolidge?,"Which US President was sworn in twice as President within 2 years, first by his father & then later by a former U.S. President?" +94,Plays,What is The Tempest?,A 1609 story in which an exiled king of Bulgaria creates a sea palace with his magic may have inspired the plot of what play? +95,Landmarks,What is the Berlin Wall?,"In 2009, during a 20th anniversary celebration, what landmark was called 'an edifice of fear. On Nov. 9, it became a place of joy'?" +96,World Capitals,"What is Vienna, Austria?","Among what world capital's nicknames are the 'City of Classical Music' &, possibly in honor of a famous resident from 1860 to 1938, the 'City of Dreams'?" +97,Language & Its Meanings,What is a night owl?,"Now meaning someone with nocturnal habits, what catches a sleeping dove in Shakespeare's 'Lucrece'?" +98,Flags of Our Hemisphere,What is Brazil?,"The stars on what country's flag represent states, 26 of them; unlike the USA's, its 'federal district' gets its own 27th star?" +99,Names in U.S. History,Who is Oliver Brown?,What father was the only man among the 13 plaintiffs in a US class-action case filed in 1951? +100,Children’s Authors,"Who is Sarah? (from Sarah, Plain and Tall)","Reversing the story of what heroine she created, childrens author Patricia Maclachlan was born on the prairie but spent much of her life in New England?" +,,, +TOTALS,,, diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/questions.txt b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/questions.txt new file mode 100644 index 0000000000000000000000000000000000000000..eea78a057126ce5da310e31157914b540c534449 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/jeopardy/questions.txt @@ -0,0 +1,100 @@ +Which man born in 1932 was the son of a percussionist in the CBS radio orchestra has been nominated for 53 Oscars? +What work in English Literature says: 'The mind is its own place, & in itself can make a heaven of hell, a hell of heaven. What matter where, if I be still the same'? +Known for more philosophical works, he wrote the play 'La Mandragola', in which Florentines are rewarded for immoral actions? +James Cook's account of a 1774 visit where records an object 'near 27 feet long, and upwards of 8 feet over the breast or shoulders'? +England's 'Bloody Assizes' & a 1685 life sentence for perjury were 2 main origins of which amendment to the U.S. Constitution? +Which nobel peace price winners each lived at times on Vilakazi St. in Soweto , so it claims to be the world's only street home to 2 Nobel Peace Prize winners? +In 1966, the year of who's death did he share plans for an experimental prototype community in Florida? +Of the 13 nations through which the Equator passes, what is the only one whose coastline borders the Caribbean Sea? +Which decorative items in fashion history get their name from their origin in the port city of Strasbourg, on the border of France & Germany? +What 1980's movie is based on an off-Broadway play with just 3 characters and won the Best Picture Oscar & the actors in all 3 roles were nominated? +A 2012 book review for which novelist noted subjects that 'sparked his ire': capital punishment, big tobacco & 'the plight of the unjustly convicted'? +A 1940 headline about what 20th Century Eponym included 'failure', 'liability when it came to offense' & 'stout hearts no match for tanks'? +Over 700 years after its traditional 1252 founding date, what port city became associated with a psychological response? +The success of what brand has its roots with a hydrotherapy pump its cofounder created for his son, who had arthritis? +In a periodical in 1807, what American Author called New York City 'Gotham, Gotham! Most enlightened of cities'? +What symbol is a rotated V in math and a feeling of some marginalized or underrepresented people in society? +Monty Norman, the composer of what character's theme, said the staccato riff conveyed sexiness, mystery & ruthlessness? +What American Novelist served with an airman named Yohannan in World War II & despite what readers might think, he said he enjoyed his service? +In what Medieval place did one of the participants in an 1170 event say, 'Let us away, knights; he will rise no more'? +At one time a province of the Roman Empire, what African country kingdom is known to Arabic scholars as Al-Maghrib Al-Aqsa, 'the far west'? +Congress relented in 1890 after what prospective state said it would wait 100 years rather than come in without the women? +A writer & producer of what movie said he wanted it to be like a Western or James Bond film, 'only it takes place in the 30s'? +In 1898 what's been called the first blockbuster art show was devoted to which artist & put on for Queen Wilhelmina's coronation? +Part of the largest contiguous land empire during the 1200s & 1300s, today what is the world's second-largest landlocked country? +A 2006 book was titled 'The Poem That Changed America:' What 'Fifty Years Later'? +Backed by 14,000 troops, who invaded England to restore, in his words, its 'religion, laws, and liberties'? +After its completion in the late 19th c., what was landmark was called 'a truly tragic street lamp' & a 'high & skinny pyramid of iron ladders'? +The busiest passenger port in the U.K., what shares its name with a capital of one of the original 13 states? +This man made lists, perhaps to cope with depression; a set of lists he published in 1852 made whose name synonymous with a type of book? +An 1869 presidential pardon was granted to which man, due in part to a plea by the Medical Society of Harford County, Maryland? +Letters, pocket knives, C rations & steel helmets are among the tangible items referred to in the title of what American literature modern war classic? +What nonfiction book has the line, 'The discovery of America…opened up fresh ground for the rising bourgeoisie'? +A radical Republican championed what 1875 act but the Supreme Court struck it down in 1883; a new version was passed 81 years later? +Whose brothers, Castor & Pollux, saved her after Theseus stole her away as a kid; a larger force would seek her later in life? +Once Africa's largest country in area, what African Country dropped to third in 2011 when a portion of it declared independence? +The ancient writer Galen said books on ships arriving to what city's port were seized, originals kept & copies returned? +For a special 1970s cookbook, who provided one simple recipe–a can of Campbell's tomato soup & 2 cans of milk? +Thought to descend from people of Southeast Asia, the Chamorro make up what U.S. territory’s largest ethnic group? +In office from 2022, the president of what country has taken so many foreign trips a play on his name is 'Ferdinand Magellan Jr.'? +In 1939 which writer lived on Toulouse Street in the French Quarter & chose the professional name that bonded him to the South? +What National Park is named for a river indigenous people called Mi tse a-da-zi, translated by French-speaking trappers as 'Pierre Jaune'? +In 2010 who introduced the 4-point shot, 35 feet from the basket? +Losses over Asia in the 1960s led to the establishment of the program known as what at a San Diego naval base in 1969? +A craft that visited what was named for Giotto, based on the story that 680 years earlier, the painter depicted it as the Star of Bethlehem? +In World War I, 'Cistern' & 'reservoir' were suggested names for what secret invention, but the British preferred this less clumsy monosyllable? +Until 1806, some German nobles included among their honors the title of 'Elector' for their role in selecting this personage? +In 1904, wearing a harness, actress Nina Boucicault became the first to play what character onstage? +Alphabetically the first German city in encyclopedias, what was also the first one taken by the Allies in World War II? +This Sanskrit word referring to a spoken word or phrase comes from a word for 'to think'? +1917's 'Elements of Trench Warfare' said what Old West invention was 'difficult to destroy' & 'difficult to get through'? +Mimi Reinhard, who never learned to type using more than 2 fingers, produced what in World War II with 1,100 names, including hers? +Poseidon carried off the maiden Theophane & turned her into a ewe; their offspring was the source of what mythical object? +Published in 2011, P.D. James' final novel, 'Death Comes to Pemberley', was a sequel to what novel from 200 years earlier? +5 U.S. states have 6-letter names; only which 2 west of the Mississippi River border each other? +Originally relating to a story of suffering, what word now more commonly refers to strong emotion of any kind? +The 2007 biopic called 'La Môme' in France, meaning 'The Kid', was released in the U.S. under what other French title? +Returning home in 1493, Columbus stopped in the Azores at an island with what name, also something he'd lost off the Haiti coast? +Pskov & Nizhny Novgorod are 2 of the cities that have a fortress called what? +In the 1950s the New York Times said what author 'is writing about all lust' & his lecherous narrator 'is all of us'? +At the winter solstice, the sun is in Sagittarius; it once appeared in what constellation, giving a geographic feature its name? +Mike Post combined the sound of a slamming jail door, an anvil & 100 men stomping on a floor for what television series that debuted in 1990? +Like Sir Thomas More, 3 16th century English queens are buried at what British location? +In 1692 Increase Mather wrote, 'It were better that ten suspected' of these who 'escape, than that one innocent person be condemned'? +The Geography Mnemonic Mimal, sometimes said to be the silhouette of a chef or elf, stands for Minnesota, Iowa, Missouri, and what other 2 states? +What was first sold in 1908, at a price equivalent to about $27,000 today? +The name of what author dead since 2013 now appears on books written by a former U.S. marshal & a former Apache helicopter pilot? +The artwork once known in France as 'la tapisserie de la Reine Mathilde' is better known as what? +In 2022 which pop star became the first woman to have a Billboard Top 10 album in 5 decades starting with the 1980s? +In one 19th century translation, what female classic tale character 'perceived the dawn of day and ceased' speaking nearly 1,000 times? +Ironically, though what company founded in the 1860s is Moore County, Tennessee's largest employer, Moore is a dry county? +After a 1789 event, who wrote, 'My first determination was to seek a supply of…water at Tofoa, & afterwards to sail for Tongataboo'? +Laurence Olivier & Ernest Borgnine were considered for the lead role & Sergio Leone to direct for what film that turned 50 in 2022? +Until a 1903 secession, what country's contiguous territory spanned 2 continents? +Early in her career which foreign-born author translated romance novels into Spanish, often changing the dialogue to make the heroines smarter? +Saying it was stolen by Napoleon, self-styled Italian patriot Vincenzo Peruggia took what in 1911? +Continuing a downward trend, in July 2022 what US body of water was at 27% capacity, its lowest level since 1937 when it was first being filled? +Each morning which goddess began her ride in her chariot across the sky ahead of her brother Sol, or Helios? +Until the Civil War, the Jan. 8 date of what American battle of dubious military importance but big morale value was a national holiday? +Which children's book title character is told 'By the time you are real, most of your hair has been loved off your eyes drop out & you get shabby'? +In a TV reunion over 40 years in the making, Dolly Parton appeared as an angel named Agnes in the final episode of what comedy in 2022? +In an 1847 American poem what character sees her town of Grand-Pré burned, but finally reunites with her beau for a kiss before his death? +In 2001 who published a book called 'Banging Your Head Against a Brick Wall'; in 2002, 'Existencilism'? +The title object of what childrens book 'never looked more beautiful each strand held dozens of bright drops of early morning dew'? +The shouts of excited children at a 1946 holiday parade are said to have inspired what perennial classic song favorite? +Unable to make what candies perfectly round, the confectioner embraced this flawed name for the product? +What country is home to 58 UNESCO World Heritage Sites, more than any other country; the sites include a volcano & a lagoon? +What action movie's last line is 'If this is their idea of Christmas, I gotta be here for New Years'? +Only 3 presidents have married while in office— John Tyler was the first & which one was the last? +Demonstrating the dignity & humanity of Black Americans, who sat for 160 known photographs, the most of any American in the 19th century? +Originally, which Latin 3-word phrase referred to when a doctor or apothecary substituted one medicine for another? +The 1975 premiere of what movie comedy advertised free coconuts for the first thousand in the audience? +A cocktail, an island & a WWII venture originally called 'Development of Substitute Materials' all bear what name? +Which US President was sworn in twice as President within 2 years, first by his father & then later by a former U.S. President? +A 1609 story in which an exiled king of Bulgaria creates a sea palace with his magic may have inspired the plot of what play? +In 2009, during a 20th anniversary celebration, what landmark was called 'an edifice of fear. On Nov. 9, it became a place of joy'? +Among what world capital's nicknames are the 'City of Classical Music' &, possibly in honor of a famous resident from 1860 to 1938, the 'City of Dreams'? +Now meaning someone with nocturnal habits, what catches a sleeping dove in Shakespeare's 'Lucrece'? +The stars on what country's flag represent states, 26 of them; unlike the USA's, its 'federal district' gets its own 27th star? +What father was the only man among the 13 plaintiffs in a US class-action case filed in 1951? +Reversing the story of what heroine she created, childrens author Patricia Maclachlan was born on the prairie but spent much of her life in New England? diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/main/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/main/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c364242fbadb425f37cf2e18e17816484d8c80f7 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/main/CMakeLists.txt @@ -0,0 +1,7 @@ +set(TARGET main) +add_executable(${TARGET} main.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/main/README.md b/ctransformers/models/submodules/ggllm.cpp/examples/main/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b6d3212feb4de55631af93cc143f43814f3d8a4d --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/main/README.md @@ -0,0 +1,293 @@ +# llama.cpp/example/main + +This example program allows you to use various LLaMA language models in an easy and efficient way. It is specifically designed to work with the [llama.cpp](https://github.com/ggerganov/llama.cpp) project, which provides a plain C/C++ implementation with optional 4-bit quantization support for faster, lower memory inference, and is optimized for desktop CPUs. This program can be used to perform various inference tasks with LLaMA models, including generating text based on user-provided prompts and chat-like interactions with reverse prompts. + +## Table of Contents + +1. [Quick Start](#quick-start) +2. [Common Options](#common-options) +3. [Input Prompts](#input-prompts) +4. [Interaction](#interaction) +5. [Context Management](#context-management) +6. [Generation Flags](#generation-flags) +7. [Performance Tuning and Memory Options](#performance-tuning-and-memory-options) +8. [Additional Options](#additional-options) + +## Quick Start + +To get started right away, run the following command, making sure to use the correct path for the model you have: + +#### Unix-based systems (Linux, macOS, etc.): + +```bash +./main -m models/7B/ggml-model.bin --prompt "Once upon a time" +``` + +#### Windows: + +```powershell +main.exe -m models\7B\ggml-model.bin --prompt "Once upon a time" +``` + +For an interactive experience, try this command: + +#### Unix-based systems (Linux, macOS, etc.): + +```bash +./main -m models/7B/ggml-model.bin -n -1 --color -r "User:" --in-prefix " " \ +'User: Hi +AI: Hello. I am an AI chatbot. Would you like to talk? +User: Sure! +AI: What would you like to talk about? +User:' +``` + +#### Windows: + +```powershell +main.exe -m models\7B\ggml-model.bin -n -1 --color -r "User:" --in-prefix " " -e --prompt "User: Hi\nAI: Hello. I am an AI chatbot. Would you like to talk?\nUser: Sure!\nAI: What would you like to talk about?\nUser:" +``` + +The following command generates "infinite" text from a starting prompt (you can use `Ctrl-C` to stop it): + +#### Unix-based systems (Linux, macOS, etc.): + +```bash +./main -m models/7B/ggml-model.bin --ignore-eos -n -1 --random-prompt +``` + +#### Windows: + +```powershell +main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt +``` + +## Common Options + +In this section, we cover the most commonly used options for running the `main` program with the LLaMA models: + +- `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`). +- `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses. +- `-ins, --instruct`: Run the program in instruction mode, which is particularly useful when working with Alpaca models. +- `-n N, --n-predict N`: Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text. +- `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. + +## Input Prompts + +The `main` program provides several ways to interact with the LLaMA models using input prompts: + +- `--prompt PROMPT`: Provide a prompt directly as a command-line option. +- `--file FNAME`: Provide a file containing a prompt or multiple prompts. +- `--interactive-first`: Run the program in interactive mode and wait for input right away. (More on this below.) +- `--random-prompt`: Start with a randomized prompt. + +## Interaction + +The `main` program offers a seamless way to interact with LLaMA models, allowing users to engage in real-time conversations or provide instructions for specific tasks. The interactive mode can be triggered using various options, including `--interactive`, `--interactive-first`, and `--instruct`. + +In interactive mode, users can participate in text generation by injecting their input during the process. Users can press `Ctrl+C` at any time to interject and type their input, followed by pressing `Return` to submit it to the LLaMA model. To submit additional lines without finalizing input, users can end the current line with a backslash (`\`) and continue typing. + +### Interaction Options + +- `-i, --interactive`: Run the program in interactive mode, allowing users to engage in real-time conversations or provide specific instructions to the model. +- `--interactive-first`: Run the program in interactive mode and immediately wait for user input before starting the text generation. +- `-ins, --instruct`: Run the program in instruction mode, which is specifically designed to work with Alpaca models that excel in completing tasks based on user instructions. +- `--color`: Enable colorized output to differentiate visually distinguishing between prompts, user input, and generated text. + +By understanding and utilizing these interaction options, you can create engaging and dynamic experiences with the LLaMA models, tailoring the text generation process to your specific needs. + +### Reverse Prompts + +Reverse prompts are a powerful way to create a chat-like experience with a LLaMA model by pausing the text generation when specific text strings are encountered: + +- `-r PROMPT, --reverse-prompt PROMPT`: Specify one or multiple reverse prompts to pause text generation and switch to interactive mode. For example, `-r "User:"` can be used to jump back into the conversation whenever it's the user's turn to speak. This helps create a more interactive and conversational experience. However, the reverse prompt doesn't work when it ends with a space. + +To overcome this limitation, you can use the `--in-prefix` flag to add a space or any other characters after the reverse prompt. + +### In-Prefix + +The `--in-prefix` flag is used to add a prefix to your input, primarily, this is used to insert a space after the reverse prompt. Here's an example of how to use the `--in-prefix` flag in conjunction with the `--reverse-prompt` flag: + +```sh +./main -r "User:" --in-prefix " " +``` + +### In-Suffix + +The `--in-suffix` flag is used to add a suffix after your input. This is useful for adding an "Assistant:" prompt after the user's input. It's added after the new-line character (`\n`) that's automatically added to the end of the user's input. Here's an example of how to use the `--in-suffix` flag in conjunction with the `--reverse-prompt` flag: + +```sh +./main -r "User:" --in-prefix " " --in-suffix "Assistant:" +``` + +### Instruction Mode + +Instruction mode is particularly useful when working with Alpaca models, which are designed to follow user instructions for specific tasks: + +- `-ins, --instruct`: Enable instruction mode to leverage the capabilities of Alpaca models in completing tasks based on user-provided instructions. + +Technical detail: the user's input is internally prefixed with the reverse prompt (or `### Instruction:` as the default), and followed by `### Response:` (except if you just press Return without any input, to keep generating a longer response). + +By understanding and utilizing these interaction options, you can create engaging and dynamic experiences with the LLaMA models, tailoring the text generation process to your specific needs. + +## Context Management + +During text generation, LLaMA models have a limited context size, which means they can only consider a certain number of tokens from the input and generated text. When the context fills up, the model resets internally, potentially losing some information from the beginning of the conversation or instructions. Context management options help maintain continuity and coherence in these situations. + +### Context Size + +The `--ctx-size` option allows you to set the size of the prompt context used by the LLaMA models during text generation. A larger context size helps the model to better comprehend and generate responses for longer input or conversations. + +- `-c N, --ctx-size N`: Set the size of the prompt context (default: 512). The LLaMA models were built with a context of 2048, which will yield the best results on longer input/inference. However, increasing the context size beyond 2048 may lead to unpredictable results. + +### Keep Prompt + +The `--keep` option allows users to retain the original prompt when the model runs out of context, ensuring a connection to the initial instruction or conversation topic is maintained. + +- `--keep N`: Specify the number of tokens from the initial prompt to retain when the model resets its internal context. By default, this value is set to 0 (meaning no tokens are kept). Use `-1` to retain all tokens from the initial prompt. + +By utilizing context management options like `--ctx-size` and `--keep`, you can maintain a more coherent and consistent interaction with the LLaMA models, ensuring that the generated text remains relevant to the original prompt or conversation. + +## Generation Flags + +The following options allow you to control the text generation process and fine-tune the diversity, creativity, and quality of the generated text according to your needs. By adjusting these options and experimenting with different combinations of values, you can find the best settings for your specific use case. + +### Number of Tokens to Predict + +- `-n N, --n-predict N`: Set the number of tokens to predict when generating text (default: 128, -1 = infinity). + +The `--n-predict` option controls the number of tokens the model generates in response to the input prompt. By adjusting this value, you can influence the length of the generated text. A higher value will result in longer text, while a lower value will produce shorter text. A value of -1 will cause text to be generated without limit. + +It is important to note that the generated text may be shorter than the specified number of tokens if an End-of-Sequence (EOS) token or a reverse prompt is encountered. In interactive mode text generation will pause and control will be returned to the user. In non-interactive mode, the program will end. In both cases, the text generation may stop before reaching the specified `n-predict` value. If you want the model to keep going without ever producing End-of-Sequence on its own, you can use the `--ignore-eos` parameter. + +### Temperature + +- `--temp N`: Adjust the randomness of the generated text (default: 0.8). + +Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. + +Example usage: `--temp 0.5` + +### Repeat Penalty + +- `--repeat-penalty N`: Control the repetition of token sequences in the generated text (default: 1.1). +- `--repeat-last-n N`: Last n tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size). +- `--no-penalize-nl`: Disable penalization for newline tokens when applying the repeat penalty. + +The `repeat-penalty` option helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. The default value is 1.1. + +The `repeat-last-n` option controls the number of tokens in the history to consider for penalizing repetition. A larger value will look further back in the generated text to prevent repetitions, while a smaller value will only consider recent tokens. A value of 0 disables the penalty, and a value of -1 sets the number of tokens considered equal to the context size (`ctx-size`). + +Use the `--no-penalize-nl` option to disable newline penalization when applying the repeat penalty. This option is particularly useful for generating chat conversations, dialogues, code, poetry, or any text where newline tokens play a significant role in structure and formatting. Disabling newline penalization helps maintain the natural flow and intended formatting in these specific use cases. + +Example usage: `--repeat-penalty 1.15 --repeat-last-n 128 --no-penalize-nl` + +### Top-K Sampling + +- `--top-k N`: Limit the next token selection to the K most probable tokens (default: 40). + +Top-k sampling is a text generation method that selects the next token only from the top k most likely tokens predicted by the model. It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit the diversity of the output. A higher value for top-k (e.g., 100) will consider more tokens and lead to more diverse text, while a lower value (e.g., 10) will focus on the most probable tokens and generate more conservative text. The default value is 40. + +Example usage: `--top-k 30` + +### Top-P Sampling + +- `--top-p N`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9). + +Top-p sampling, also known as nucleus sampling, is another text generation method that selects the next token from a subset of tokens that together have a cumulative probability of at least p. This method provides a balance between diversity and quality by considering both the probabilities of tokens and the number of tokens to sample from. A higher value for top-p (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. The default value is 0.9. + +Example usage: `--top-p 0.95` + +### Tail Free Sampling (TFS) + +- `--tfs N`: Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled). + +Tail free sampling (TFS) is a text generation technique that aims to reduce the impact of less likely tokens, which may be less relevant, less coherent, or nonsensical, on the output. The method adjusts the logits (token probabilities) by raising them to the power of the parameter z. A higher value of z (e.g., 2.0) will further suppress less likely tokens from the tail of the distribution, while a value of 1.0 disables the effect of TFS. By setting the parameter z, you can control how much the probabilities of less likely tokens are reduced. + +Example usage: `--tfs 2.0` + +### Locally Typical Sampling + +- `--typical N`: Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled). + +Locally typical sampling promotes the generation of contextually coherent and diverse text by sampling tokens that are typical or expected based on the surrounding context. By setting the parameter p between 0 and 1, you can control the balance between producing text that is locally coherent and diverse. A value closer to 1 will promote more contextually coherent tokens, while a value closer to 0 will promote more diverse tokens. A value equal to 1 disables locally typical sampling. + +Example usage: `--typical 0.9` + +### Mirostat Sampling + +- `--mirostat N`: Enable Mirostat sampling, controlling perplexity during text generation (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0). +- `--mirostat-lr N`: Set the Mirostat learning rate, parameter eta (default: 0.1). +- `--mirostat-ent N`: Set the Mirostat target entropy, parameter tau (default: 5.0). + +Mirostat is an algorithm that actively maintains the quality of generated text within a desired range during text generation. It aims to strike a balance between coherence and diversity, avoiding low-quality output caused by excessive repetition (boredom traps) or incoherence (confusion traps). + +The `--mirostat-lr` option sets the Mirostat learning rate (eta). The learning rate influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. The default value is `0.1`. + +The `--mirostat-ent` option sets the Mirostat target entropy (tau), which represents the desired perplexity value for the generated text. Adjusting the target entropy allows you to control the balance between coherence and diversity in the generated text. A lower value will result in more focused and coherent text, while a higher value will lead to more diverse and potentially less coherent text. The default value is `5.0`. + +Example usage: `--mirostat 2 --mirostat-lr 0.05 --mirostat-ent 3.0` + +### Logit Bias + +- `-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS`: Modify the likelihood of a token appearing in the generated text completion. + +The logit bias option allows you to manually adjust the likelihood of specific tokens appearing in the generated text. By providing a token ID and a positive or negative bias value, you can increase or decrease the probability of that token being generated. + +For example, use `--logit-bias 15043+1` to increase the likelihood of the token 'Hello', or `--logit-bias 15043-1` to decrease its likelihood. Using a value of negative infinity, `--logit-bias 15043-inf` ensures that the token `Hello` is never produced. + +A more practical use case might be to prevent the generation of `\code{begin}` and `\code{end}` by setting the `\` token (29905) to negative infinity with `-l 29905-inf`. (This is due to the prevalence of LaTeX codes that show up in LLaMA model inference.) + +Example usage: `--logit-bias 29905-inf` + +### RNG Seed + +- `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed). + +The RNG seed is used to initialize the random number generator that influences the text generation process. By setting a specific seed value, you can obtain consistent and reproducible results across multiple runs with the same input and settings. This can be helpful for testing, debugging, or comparing the effects of different options on the generated text to see when they diverge. If the seed is set to a value less than 0, a random seed will be used, which will result in different outputs on each run. + +## Performance Tuning and Memory Options + +These options help improve the performance and memory usage of the LLaMA models. By adjusting these settings, you can fine-tune the model's behavior to better suit your system's capabilities and achieve optimal performance for your specific use case. + +### Number of Threads + +- `-t N, --threads N`: Set the number of threads to use during computation. For optimal performance, it is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Using the correct number of threads can greatly improve performance. + +### Mlock + +- `--mlock`: Lock the model in memory, preventing it from being swapped out when memory-mapped. This can improve performance but trades away some of the advantages of memory-mapping by requiring more RAM to run and potentially slowing down load times as the model loads into RAM. + +### No Memory Mapping + +- `--no-mmap`: Do not memory-map the model. By default, models are mapped into memory, which allows the system to load only the necessary parts of the model as needed. However, if the model is larger than your total amount of RAM or if your system is low on available memory, using mmap might increase the risk of pageouts, negatively impacting performance. Disabling mmap results in slower load times but may reduce pageouts if you're not using `--mlock`. Note that if the model is larger than the total amount of RAM, turning off mmap would prevent the model from loading at all. + +### Memory Float 32 + +- `--memory-f32`: Use 32-bit floats instead of 16-bit floats for memory key+value. This doubles the context memory requirement and cached prompt file size but does not appear to increase generation quality in a measurable way. Not recommended. + +### Batch Size + +- `-b N, --batch-size N`: Set the batch size for prompt processing (default: 512). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations. + +### Prompt Caching + +- `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs. **Note**: Restoring a cached prompt does not imply restoring the exact state of the session at the point it was saved. So even when specifying a specific seed, you are not guaranteed to get the same sequence of tokens as the original generation. + +### Quantization + +For information about 4-bit quantization, which can significantly improve performance and reduce memory usage, please refer to llama.cpp's primary [README](../../README.md#prepare-data--run). + +## Additional Options + +These options provide extra functionality and customization when running the LLaMA models: + +- `-h, --help`: Display a help message showing all available options and their default values. This is particularly useful for checking the latest options and default values, as they can change frequently, and the information in this document may become outdated. +- `--verbose-prompt`: Print the prompt before generating text. +- `--mtest`: Test the model's functionality by running a series of tests to ensure it's working properly. +- `-ngl N, --n-gpu-layers N`: When compiled with appropriate support (currently CLBlast or cuBLAS), this option allows offloading some layers to the GPU for computation. Generally results in increased performance. +- `-mg i, --main-gpu i`: When using multiple GPUs this option controls which GPU is used for small tensors for which the overhead of splitting the computation across all GPUs is not worthwhile. The GPU in question will use slightly more VRAM to store a scratch buffer for temporary results. By default GPU 0 is used. Requires cuBLAS. +- `-ts SPLIT, --tensor-split SPLIT`: When using multiple GPUs this option controls how large tensors should be split across all GPUs. `SPLIT` is a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order. For example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1. By default the data is split in proportion to VRAM but this may not be optimal for performance. Requires cuBLAS. +- `-lv, --low-vram`: Do not allocate a VRAM scratch buffer for holding temporary results. Reduces VRAM usage at the cost of performance, particularly prompt processing speed. Requires cuBLAS. +- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains. +- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation. diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/main/main.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/main/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..941312f9cc756c8d90d8b055950d2064ae7cc67e --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/main/main.cpp @@ -0,0 +1,671 @@ +// Defines sigaction on msys: +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include "common.h" +#include "llama.h" +#include "build-info.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#include +#include +#elif defined (_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#include +#endif + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +static console_state con_st; +static llama_context ** g_ctx; + +static bool is_interacting = false; + +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) +void sigint_handler(int signo) { + if (signo == SIGINT) { + if (!is_interacting) { + is_interacting=true; + } else { + console_cleanup(con_st); + printf("\n"); + llama_print_timings(*g_ctx); + _exit(130); + } + } +} +#endif + +int main(int argc, char ** argv) { + gpt_params params; + + if (gpt_params_parse(argc, argv, params) == false) { + return 1; + } + + // save choice to use color for later + // (note for later: this is a slightly awkward choice) + con_st.use_color = params.use_color; + con_st.multiline_input = params.multiline_input; + console_init(con_st); + atexit([]() { console_cleanup(con_st); }); + + if (params.perplexity) { + printf("\n************\n"); + printf("%s: please use the 'perplexity' tool for perplexity calculations\n", __func__); + printf("************\n\n"); + + return 0; + } + + if (params.embedding) { + printf("\n************\n"); + printf("%s: please use the 'embedding' tool for embedding calculations\n", __func__); + printf("************\n\n"); + + return 0; + } + + if (params.n_ctx > 2048) { + fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);" + "expect poor results\n", __func__, params.n_ctx); + } else if (params.n_ctx < 8) { + fprintf(stderr, "%s: warning: minimum context size is 8, using minimum size.\n", __func__); + params.n_ctx = 8; + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + + if (params.seed < 0) { + params.seed = time(NULL); + } + + fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + + std::mt19937 rng(params.seed); + if (params.random_prompt) { + params.prompt = gpt_random_prompt(rng); + } + + llama_init_backend(); + + llama_context * ctx; + g_ctx = &ctx; + + // load the model and apply lora adapter, if any + ctx = llama_init_from_gpt_params(params); + if (ctx == NULL) { + fprintf(stderr, "%s: error: unable to load model\n", __func__); + return 1; + } + + // print system information + { + fprintf(stderr, "\n"); + fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", + params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); + } + + // determine the maximum memory usage needed to do inference for the given n_batch and n_predict parameters + // uncomment the "used_mem" line in llama.cpp to see the results + if (params.mem_test) { + { + const std::vector tmp(params.n_batch, llama_token_bos()); + llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads); + } + + { + const std::vector tmp = { 0, }; + llama_eval(ctx, tmp.data(), tmp.size(), params.n_predict - 1, params.n_threads); + } + + llama_print_timings(ctx); + llama_free(ctx); + + return 0; + } + + // export the cgraph and exit + if (params.export_cgraph) { + llama_eval_export(ctx, "llama.ggml"); + llama_free(ctx); + + return 0; + } + + std::string path_session = params.path_prompt_cache; + std::vector session_tokens; + + if (!path_session.empty()) { + fprintf(stderr, "%s: attempting to load saved session from '%s'\n", __func__, path_session.c_str()); + + // fopen to check for existing session + FILE * fp = std::fopen(path_session.c_str(), "rb"); + if (fp != NULL) { + std::fclose(fp); + + session_tokens.resize(params.n_ctx); + size_t n_token_count_out = 0; + if (!llama_load_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out)) { + fprintf(stderr, "%s: error: failed to load session file '%s'\n", __func__, path_session.c_str()); + return 1; + } + session_tokens.resize(n_token_count_out); + llama_set_rng_seed(ctx, params.seed); + + fprintf(stderr, "%s: loaded a session with prompt size of %d tokens\n", __func__, (int) session_tokens.size()); + } else { + fprintf(stderr, "%s: session file does not exist, will create\n", __func__); + } + } + + // tokenize the prompt + std::vector embd_inp; + + if (params.interactive_first || params.instruct || !params.prompt.empty() || session_tokens.empty()) { + // Add a space in front of the first character to match OG llama tokenizer behavior + params.prompt.insert(0, 1, ' '); + + embd_inp = ::llama_tokenize(ctx, params.prompt, true); + } else { + embd_inp = session_tokens; + } + + const int n_ctx = llama_n_ctx(ctx); + + if ((int) embd_inp.size() > n_ctx - 4) { + fprintf(stderr, "%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int) embd_inp.size(), n_ctx - 4); + return 1; + } + + // debug message about similarity of saved session, if applicable + size_t n_matching_session_tokens = 0; + if (session_tokens.size()) { + for (llama_token id : session_tokens) { + if (n_matching_session_tokens >= embd_inp.size() || id != embd_inp[n_matching_session_tokens]) { + break; + } + n_matching_session_tokens++; + } + if (params.prompt.empty() && n_matching_session_tokens == embd_inp.size()) { + fprintf(stderr, "%s: using full prompt from session file\n", __func__); + } else if (n_matching_session_tokens >= embd_inp.size()) { + fprintf(stderr, "%s: session file has exact match for prompt!\n", __func__); + } else if (n_matching_session_tokens < (embd_inp.size() / 2)) { + fprintf(stderr, "%s: warning: session file has low similarity to prompt (%zu / %zu tokens); will mostly be reevaluated\n", + __func__, n_matching_session_tokens, embd_inp.size()); + } else { + fprintf(stderr, "%s: session file matches %zu / %zu tokens of prompt\n", + __func__, n_matching_session_tokens, embd_inp.size()); + } + } + + // if we will use the cache for the full prompt without reaching the end of the cache, force + // reevaluation of the last token token to recalculate the cached logits + if (!embd_inp.empty() && n_matching_session_tokens == embd_inp.size() && + session_tokens.size() > embd_inp.size()) { + session_tokens.resize(embd_inp.size() - 1); + } + + // number of tokens to keep when resetting context + if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct) { + params.n_keep = (int)embd_inp.size(); + } + + // prefix & suffix for instruct mode + const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true); + const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false); + + // in instruct mode, we inject a prefix and a suffix to each input by the user + if (params.instruct) { + params.interactive_first = true; + params.antiprompt.push_back("### Instruction:\n\n"); + } + + // enable interactive mode if interactive start is specified + if (params.interactive_first) { + params.interactive = true; + } + + // determine newline token + auto llama_token_newline = ::llama_tokenize(ctx, "\n", false); + + if (params.verbose_prompt) { + fprintf(stderr, "\n"); + fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); + fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); + for (int i = 0; i < (int) embd_inp.size(); i++) { + fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i])); + } + if (params.n_keep > 0) { + fprintf(stderr, "%s: static prompt based on n_keep: '", __func__); + for (int i = 0; i < params.n_keep; i++) { + fprintf(stderr, "%s", llama_token_to_str(ctx, embd_inp[i])); + } + fprintf(stderr, "'\n"); + } + fprintf(stderr, "\n"); + } + + if (params.interactive) { +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) + struct sigaction sigint_action; + sigint_action.sa_handler = sigint_handler; + sigemptyset (&sigint_action.sa_mask); + sigint_action.sa_flags = 0; + sigaction(SIGINT, &sigint_action, NULL); +#elif defined (_WIN32) + auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL { + return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false; + }; + SetConsoleCtrlHandler(static_cast(console_ctrl_handler), true); +#endif + + fprintf(stderr, "%s: interactive mode on.\n", __func__); + + if (params.antiprompt.size()) { + for (auto antiprompt : params.antiprompt) { + fprintf(stderr, "Reverse prompt: '%s'\n", antiprompt.c_str()); + } + } + + if (!params.input_prefix.empty()) { + fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str()); + } + + if (!params.input_suffix.empty()) { + fprintf(stderr, "Input suffix: '%s'\n", params.input_suffix.c_str()); + } + } + fprintf(stderr, "sampling: repeat_last_n = %d, repeat_penalty = %f, presence_penalty = %f, frequency_penalty = %f, top_k = %d, tfs_z = %f, top_p = %f, typical_p = %f, temp = %f, mirostat = %d, mirostat_lr = %f, mirostat_ent = %f\n", + params.repeat_last_n, params.repeat_penalty, params.presence_penalty, params.frequency_penalty, params.top_k, params.tfs_z, params.top_p, params.typical_p, params.temp, params.mirostat, params.mirostat_eta, params.mirostat_tau); + fprintf(stderr, "generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep); + fprintf(stderr, "\n\n"); + + // TODO: replace with ring-buffer + std::vector last_n_tokens(n_ctx); + std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0); + + if (params.interactive) { + const char *control_message; + if (con_st.multiline_input) { + control_message = " - To return control to LLaMa, end your input with '\\'.\n" + " - To return control without starting a new line, end your input with '/'.\n"; + } else { + control_message = " - Press Return to return control to LLaMa.\n" + " - To return control without starting a new line, end your input with '/'.\n" + " - If you want to submit another line, end your input with '\\'.\n"; + } + fprintf(stderr, "== Running in interactive mode. ==\n" +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) + " - Press Ctrl+C to interject at any time.\n" +#endif + "%s\n", control_message); + + is_interacting = params.interactive_first; + } + + bool is_antiprompt = false; + bool input_echo = true; + bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < embd_inp.size(); + + int n_past = 0; + int n_remain = params.n_predict; + int n_consumed = 0; + int n_session_consumed = 0; + + // the first thing we will do is to output the prompt, so set color accordingly + console_set_color(con_st, CONSOLE_COLOR_PROMPT); + + std::vector embd; + + // do one empty run to warm up the model + { + const std::vector tmp = { llama_token_bos(), }; + llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads); + llama_reset_timings(ctx); + } + + while ((n_remain != 0 && !is_antiprompt) || params.interactive) { + // predict + if (embd.size() > 0) { + // Note: n_ctx - 4 here is to match the logic for commandline prompt handling via + // --prompt or --file which uses the same value. + auto max_embd_size = n_ctx - 4; + // Ensure the input doesn't exceed the context size by truncating embd if necessary. + if ((int)embd.size() > max_embd_size) { + auto skipped_tokens = embd.size() - max_embd_size; + console_set_color(con_st, CONSOLE_COLOR_ERROR); + printf("<>", skipped_tokens, skipped_tokens != 1 ? "s" : ""); + console_set_color(con_st, CONSOLE_COLOR_DEFAULT); + fflush(stdout); + embd.resize(max_embd_size); + } + + // infinite text generation via context swapping + // if we run out of context: + // - take the n_keep first tokens from the original prompt (via n_past) + // - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches + if (n_past + (int) embd.size() > n_ctx) { + const int n_left = n_past - params.n_keep; + + // always keep the first token - BOS + n_past = std::max(1, params.n_keep); + + // insert n_left/2 tokens at the start of embd from last_n_tokens + embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left/2 - embd.size(), last_n_tokens.end() - embd.size()); + + // stop saving session if we run out of context + path_session.clear(); + + //printf("\n---\n"); + //printf("resetting: '"); + //for (int i = 0; i < (int) embd.size(); i++) { + // printf("%s", llama_token_to_str(ctx, embd[i])); + //} + //printf("'\n"); + //printf("\n---\n"); + } + + // try to reuse a matching prefix from the loaded session instead of re-eval (via n_past) + if (n_session_consumed < (int) session_tokens.size()) { + size_t i = 0; + for ( ; i < embd.size(); i++) { + if (embd[i] != session_tokens[n_session_consumed]) { + session_tokens.resize(n_session_consumed); + break; + } + + n_past++; + n_session_consumed++; + + if (n_session_consumed >= (int) session_tokens.size()) { + ++i; + break; + } + } + if (i > 0) { + embd.erase(embd.begin(), embd.begin() + i); + } + } + + // evaluate tokens in batches + // embd is typically prepared beforehand to fit within a batch, but not always + for (int i = 0; i < (int) embd.size(); i += params.n_batch) { + int n_eval = (int) embd.size() - i; + if (n_eval > params.n_batch) { + n_eval = params.n_batch; + } + if (llama_eval(ctx, &embd[i], n_eval, n_past, params.n_threads)) { + fprintf(stderr, "%s : failed to eval\n", __func__); + return 1; + } + n_past += n_eval; + } + + if (embd.size() > 0 && !path_session.empty()) { + session_tokens.insert(session_tokens.end(), embd.begin(), embd.end()); + n_session_consumed = session_tokens.size(); + } + } + + embd.clear(); + + if ((int) embd_inp.size() <= n_consumed && !is_interacting) { + // out of user input, sample next token + const float temp = params.temp; + const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k; + const float top_p = params.top_p; + const float tfs_z = params.tfs_z; + const float typical_p = params.typical_p; + const int32_t repeat_last_n = params.repeat_last_n < 0 ? n_ctx : params.repeat_last_n; + const float repeat_penalty = params.repeat_penalty; + const float alpha_presence = params.presence_penalty; + const float alpha_frequency = params.frequency_penalty; + const int mirostat = params.mirostat; + const float mirostat_tau = params.mirostat_tau; + const float mirostat_eta = params.mirostat_eta; + const bool penalize_nl = params.penalize_nl; + + // optionally save the session on first sample (for faster prompt loading next time) + if (!path_session.empty() && need_to_save_session && !params.prompt_cache_ro) { + need_to_save_session = false; + llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); + } + + llama_token id = 0; + + { + auto logits = llama_get_logits(ctx); + auto n_vocab = llama_n_vocab(ctx); + + // Apply params.logit_bias map + for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) { + logits[it->first] += it->second; + } + + std::vector candidates; + candidates.reserve(n_vocab); + for (llama_token token_id = 0; token_id < n_vocab; token_id++) { + candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f}); + } + + llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false }; + + // Apply penalties + float nl_logit = logits[llama_token_nl()]; + auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx); + llama_sample_repetition_penalty(ctx, &candidates_p, + last_n_tokens.data() + last_n_tokens.size() - last_n_repeat, + last_n_repeat, repeat_penalty); + llama_sample_frequency_and_presence_penalties(ctx, &candidates_p, + last_n_tokens.data() + last_n_tokens.size() - last_n_repeat, + last_n_repeat, alpha_frequency, alpha_presence); + if (!penalize_nl) { + logits[llama_token_nl()] = nl_logit; + } + + if (temp <= 0) { + // Greedy sampling + id = llama_sample_token_greedy(ctx, &candidates_p); + } else { + if (mirostat == 1) { + static float mirostat_mu = 2.0f * mirostat_tau; + const int mirostat_m = 100; + llama_sample_temperature(ctx, &candidates_p, temp); + id = llama_sample_token_mirostat(ctx, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu); + } else if (mirostat == 2) { + static float mirostat_mu = 2.0f * mirostat_tau; + llama_sample_temperature(ctx, &candidates_p, temp); + id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu); + } else { + // Temperature sampling + llama_sample_top_k(ctx, &candidates_p, top_k, 1); + llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1); + llama_sample_typical(ctx, &candidates_p, typical_p, 1); + llama_sample_top_p(ctx, &candidates_p, top_p, 1); + llama_sample_temperature(ctx, &candidates_p, temp); + id = llama_sample_token(ctx, &candidates_p); + } + } + // printf("`%d`", candidates_p.size); + + last_n_tokens.erase(last_n_tokens.begin()); + last_n_tokens.push_back(id); + } + + // replace end of text token with newline token when in interactive mode + if (id == llama_token_eos() && params.interactive && !params.instruct) { + id = llama_token_newline.front(); + if (params.antiprompt.size() != 0) { + // tokenize and inject first reverse prompt + const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); + embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); + } + } + + // add it to the context + embd.push_back(id); + + // echo this to console + input_echo = true; + + // decrement remaining sampling budget + --n_remain; + } else { + // some user input remains from prompt or interaction, forward it to processing + while ((int) embd_inp.size() > n_consumed) { + embd.push_back(embd_inp[n_consumed]); + last_n_tokens.erase(last_n_tokens.begin()); + last_n_tokens.push_back(embd_inp[n_consumed]); + ++n_consumed; + if ((int) embd.size() >= params.n_batch) { + break; + } + } + } + + // display text + if (input_echo) { + for (auto id : embd) { + printf("%s", llama_token_to_str(ctx, id)); + } + fflush(stdout); + } + // reset color to default if we there is no pending user input + if (input_echo && (int)embd_inp.size() == n_consumed) { + console_set_color(con_st, CONSOLE_COLOR_DEFAULT); + } + + // if not currently processing queued inputs; + if ((int) embd_inp.size() <= n_consumed) { + + // check for reverse prompt + if (params.antiprompt.size()) { + std::string last_output; + for (auto id : last_n_tokens) { + last_output += llama_token_to_str(ctx, id); + } + + is_antiprompt = false; + // Check if each of the reverse prompts appears at the end of the output. + // If we're not running interactively, the reverse prompt might be tokenized with some following characters + // so we'll compensate for that by widening the search window a bit. + for (std::string & antiprompt : params.antiprompt) { + size_t extra_padding = params.interactive ? 0 : 2; + size_t search_start_pos = last_output.length() > static_cast(antiprompt.length() + extra_padding) + ? last_output.length() - static_cast(antiprompt.length() + extra_padding) + : 0; + + if (last_output.find(antiprompt.c_str(), search_start_pos) != std::string::npos) { + if (params.interactive) { + is_interacting = true; + console_set_color(con_st, CONSOLE_COLOR_USER_INPUT); + } + is_antiprompt = true; + fflush(stdout); + break; + } + } + } + + if (n_past > 0 && is_interacting) { + if (params.instruct) { + printf("\n> "); + } + + std::string buffer; + if (!params.input_prefix.empty()) { + buffer += params.input_prefix; + printf("%s", buffer.c_str()); + } + + std::string line; + bool another_line = true; + do { + another_line = console_readline(con_st, line); + buffer += line; + } while (another_line); + + // done taking input, reset color + console_set_color(con_st, CONSOLE_COLOR_DEFAULT); + + // Add tokens to embd only if the input buffer is non-empty + // Entering a empty line lets the user pass control back + if (buffer.length() > 1) { + // append input suffix if any + if (!params.input_suffix.empty()) { + buffer += params.input_suffix; + printf("%s", params.input_suffix.c_str()); + } + + // instruct mode: insert instruction prefix + if (params.instruct && !is_antiprompt) { + n_consumed = embd_inp.size(); + embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end()); + } + + auto line_inp = ::llama_tokenize(ctx, buffer, false); + embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); + + // instruct mode: insert response suffix + if (params.instruct) { + embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); + } + + n_remain -= line_inp.size(); + } + + input_echo = false; // do not echo this again + } + + if (n_past > 0) { + is_interacting = false; + } + } + + // end of text token + if (!embd.empty() && embd.back() == llama_token_eos()) { + if (params.instruct) { + is_interacting = true; + } else { + fprintf(stderr, " [end of text]\n"); + break; + } + } + + // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. + if (params.interactive && n_remain <= 0 && params.n_predict != -1) { + n_remain = params.n_predict; + is_interacting = true; + } + } + + if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) { + fprintf(stderr, "\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str()); + llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); + } + + llama_print_timings(ctx); + llama_free(ctx); + + return 0; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/metal/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/metal/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a8c4284a53642edad16696a5edc4dd4e3c8b9269 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/metal/CMakeLists.txt @@ -0,0 +1,3 @@ +set(TEST_TARGET metal) +add_executable(${TEST_TARGET} metal.cpp) +target_link_libraries(${TEST_TARGET} PRIVATE ggml) diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/metal/metal.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/metal/metal.cpp new file mode 100644 index 0000000000000000000000000000000000000000..77aca94a3ec97c20e16ddec9e9d7bb5ac1e3bfed --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/metal/metal.cpp @@ -0,0 +1,102 @@ +// Evaluate a statically exported ggml computation graph with Metal +// +// - First, export a LLaMA graph: +// +// $ ./bin/main -m ../models/7B/ggml-model-q4_0.bin --export +// +// - Run this tool to evaluate the exported graph: +// +// $ ./bin/metal llama.ggml +// +// The purpose of this tool is mostly for debugging and demonstration purposes. +// The main limitation of exporting computation graphs is that their sizes are static which often +// can be a problem for real-world applications. +// + +#include "ggml.h" +#include "ggml-metal.h" + +#include +#include +#include + +int main(int argc, char ** argv) { + ggml_time_init(); + + if (argc != 2) { + fprintf(stderr, "Usage: %s llama.ggml\n", argv[0]); + return -1; + } + + const char * fname_cgraph = argv[1]; + + // load the compute graph + struct ggml_context * ctx_data = NULL; + struct ggml_context * ctx_eval = NULL; + + struct ggml_cgraph gf = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval); + gf.n_threads = 1; + + // this allocates all Metal resources and memory buffers + auto * ctx_metal = ggml_metal_init(); + + ggml_metal_add_buffer(ctx_metal, "data", ggml_get_mem_buffer(ctx_data), ggml_get_mem_size(ctx_data)); + ggml_metal_add_buffer(ctx_metal, "eval", ggml_get_mem_buffer(ctx_eval), ggml_get_mem_size(ctx_eval)); + + // main + { + struct ggml_tensor * input = ggml_graph_get_tensor(&gf, "embd"); + *(int32_t *) input->data = 1; // BOS + + ggml_metal_set_tensor(ctx_metal, input); + + // warmup + ggml_metal_graph_compute(ctx_metal, &gf); + + const int n_iter = 16; + + const int64_t t0 = ggml_time_us(); + + // the actual inference happens here + for (int i = 0; i < n_iter; ++i) { + ggml_metal_graph_compute(ctx_metal, &gf); + } + + const int64_t t1 = ggml_time_us(); + + printf("time: %.2f ms, %.2f ms/tok\n", (t1 - t0) / 1000.0, (t1 - t0) / 1000.0 / n_iter); + } + + // debug output + { + struct ggml_tensor * logits = gf.nodes[gf.n_nodes - 1]; + ggml_metal_get_tensor(ctx_metal, logits); + + float * ptr = (float *) ggml_get_data(logits); + + printf("logits: "); + for (int i = 0; i < 10; i++) { + printf("%8.4f ", ptr[i]); + } + printf("\n"); + int imax = 0; + double sum = 0.0; + double vmax = -1e9; + for (int i = 0; i < 32000; i++) { + sum += (double) ptr[i]; + if (ptr[i] > vmax) { + vmax = ptr[i]; + imax = i; + } + } + printf("sum: %f, imax = %d, vmax = %f\n", sum, imax, vmax); + } + + ggml_metal_free(ctx_metal); + + ggml_free(ctx_data); + ggml_free(ctx_eval); + + return 0; +} + diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/perplexity/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/perplexity/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..61b17b828dd1bc9efc0b4aee2d15457f229671d7 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/perplexity/CMakeLists.txt @@ -0,0 +1,7 @@ +set(TARGET perplexity) +add_executable(${TARGET} perplexity.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/perplexity/README.md b/ctransformers/models/submodules/ggllm.cpp/examples/perplexity/README.md new file mode 100644 index 0000000000000000000000000000000000000000..eacfb17c67fb2e8a476d9bc58b84a34c9942b24c --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/perplexity/README.md @@ -0,0 +1,3 @@ +# perplexity + +TODO diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/perplexity/perplexity.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/perplexity/perplexity.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ae8cfe0afc0b7099e97324a08d8b19bc8c1df0db --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/perplexity/perplexity.cpp @@ -0,0 +1,174 @@ +#include "common.h" +#include "llama.h" +#include "build-info.h" + +#include +#include + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +std::vector softmax(const std::vector& logits) { + std::vector probs(logits.size()); + float max_logit = logits[0]; + for (float v : logits) max_logit = std::max(max_logit, v); + double sum_exp = 0.0; + for (size_t i = 0; i < logits.size(); i++) { + // Subtract the maximum logit value from the current logit value for numerical stability + const float logit = logits[i] - max_logit; + const float exp_logit = expf(logit); + sum_exp += exp_logit; + probs[i] = exp_logit; + } + for (size_t i = 0; i < probs.size(); i++) probs[i] /= sum_exp; + return probs; +} + +void perplexity(llama_context * ctx, const gpt_params & params) { + // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research + // Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw` + // Output: `perplexity: 13.5106 [114/114]` + // BOS tokens will be added for each chunk before eval + auto tokens = ::llama_tokenize(ctx, params.prompt, true); + + int count = 0; + + const int n_chunk = tokens.size() / params.n_ctx; + const int n_vocab = llama_n_vocab(ctx); + const int n_batch = params.n_batch; + + double nll = 0.0; + fprintf(stderr, "%s: calculating perplexity over %d chunks, batch_size=%d\n", __func__, n_chunk, n_batch); + + for (int i = 0; i < n_chunk; ++i) { + const int start = i * params.n_ctx; + const int end = start + params.n_ctx; + + const int num_batches = (params.n_ctx + n_batch - 1) / n_batch; + + std::vector logits; + + const auto t_start = std::chrono::high_resolution_clock::now(); + + for (int j = 0; j < num_batches; ++j) { + const int batch_start = start + j * n_batch; + const int batch_size = std::min(end - batch_start, n_batch); + + // save original token and restore it after eval + const auto token_org = tokens[batch_start]; + + // add BOS token for the first batch of each chunk + if (j == 0) { + tokens[batch_start] = llama_token_bos(); + } + + if (llama_eval(ctx, tokens.data() + batch_start, batch_size, j * n_batch, params.n_threads)) { + fprintf(stderr, "%s : failed to eval\n", __func__); + return; + } + + // restore the original token in case it was set to BOS + tokens[batch_start] = token_org; + + const auto batch_logits = llama_get_logits(ctx); + logits.insert(logits.end(), batch_logits, batch_logits + batch_size * n_vocab); + } + + const auto t_end = std::chrono::high_resolution_clock::now(); + + if (i == 0) { + const float t_total = std::chrono::duration(t_end - t_start).count(); + fprintf(stderr, "%s: %.2f seconds per pass - ETA ", __func__, t_total); + int total_seconds = (int)(t_total * n_chunk); + if (total_seconds >= 60*60) { + fprintf(stderr, "%d hours ", total_seconds / (60*60)); + total_seconds = total_seconds % (60*60); + } + fprintf(stderr, "%d minutes\n", total_seconds / 60); + } + + // We get the logits for all the tokens in the context window (params.n_ctx) + // from llama_eval above. Now, based on https://huggingface.co/docs/transformers/perplexity, + // calculate the perplexity over the last half of the window (so the model always has + // some context to predict the token). + // + // We rely on the fact that attention in the forward pass only looks at previous + // tokens here, so the logits returned for each token are an accurate representation + // of what the model would have predicted at that point. + // + // Example, we have a context window of 512, we will compute perplexity for each of the + // last 256 tokens. Then, we split the input up into context window size chunks to + // process the entire prompt. + for (int j = std::min(512, params.n_ctx / 2); j < params.n_ctx - 1; ++j) { + // Calculate probability of next token, given the previous ones. + const std::vector tok_logits( + logits.begin() + (j + 0) * n_vocab, + logits.begin() + (j + 1) * n_vocab); + + const float prob = softmax(tok_logits)[tokens[start + j + 1]]; + + nll += -std::log(prob); + ++count; + } + // perplexity is e^(average negative log-likelihood) + printf("[%d]%.4lf,", i + 1, std::exp(nll / count)); + fflush(stdout); + } + printf("\n"); +} + +int main(int argc, char ** argv) { + gpt_params params; + + params.n_batch = 512; + if (gpt_params_parse(argc, argv, params) == false) { + return 1; + } + + params.perplexity = true; + params.n_batch = std::min(params.n_batch, params.n_ctx); + + if (params.n_ctx > 2048) { + fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);" + "expect poor results\n", __func__, params.n_ctx); + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + + if (params.seed < 0) { + params.seed = time(NULL); + } + + fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); + + std::mt19937 rng(params.seed); + if (params.random_prompt) { + params.prompt = gpt_random_prompt(rng); + } + + llama_init_backend(); + + llama_context * ctx; + + // load the model and apply lora adapter, if any + ctx = llama_init_from_gpt_params(params); + if (ctx == NULL) { + fprintf(stderr, "%s: error: unable to load model\n", __func__); + return 1; + } + + // print system information + { + fprintf(stderr, "\n"); + fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", + params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); + } + + perplexity(ctx, params); + + llama_print_timings(ctx); + llama_free(ctx); + + return 0; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/quantize-stats/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/quantize-stats/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7bebc11a124b05b8675184f65e096b6c18e37cbf --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/quantize-stats/CMakeLists.txt @@ -0,0 +1,4 @@ +set(TARGET quantize-stats) +add_executable(${TARGET} quantize-stats.cpp) +target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/quantize-stats/quantize-stats.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/quantize-stats/quantize-stats.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6b8018ee2843225c87efce08e1e38d6ed56a8e37 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/quantize-stats/quantize-stats.cpp @@ -0,0 +1,427 @@ +#include "ggml.h" +#include "build-info.h" + +#define LLAMA_API_INTERNAL +#include "llama.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +struct quantize_stats_params { + std::string model = "models/7B/ggml-model-f16.bin"; + bool verbose = false; + bool per_layer_stats = false; + bool print_histogram = false; + bool reference = false; + std::vector include_layers; + std::vector exclude_layers; + std::vector include_types; +}; + +const size_t HISTOGRAM_BUCKETS = 150; +const double HISTOGRAM_RANGE = 0.03; + +struct error_stats { + size_t num_samples; + double total_error; + double max_error; + uint64_t error_histogram[HISTOGRAM_BUCKETS]; +}; + + +void quantize_stats_print_usage(int /*argc*/, char ** argv) { + quantize_stats_params params; + fprintf(stderr, "usage: %s [options]\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "options:\n"); + fprintf(stderr, " -h, --help show this help message and exit\n"); + fprintf(stderr, " -m FNAME, --model FNAME\n"); + fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); + fprintf(stderr, " -r, --reference\n"); + fprintf(stderr, " use reference implementation (default: false)\n"); + fprintf(stderr, " -v, --verbose\n"); + fprintf(stderr, " verbose output (default: false)\n"); + fprintf(stderr, " -p, --per-layer-stats\n"); + fprintf(stderr, " print stats per layer (default: false)\n"); + fprintf(stderr, " --histogram\n"); + fprintf(stderr, " print error histogram (default: false)\n"); + fprintf(stderr, " -l LAYER, --include-layer LAYER\n"); + fprintf(stderr, " only test layers matching pattern\n"); + fprintf(stderr, " -L LAYER, --exclude-layer LAYER\n"); + fprintf(stderr, " exclude layers matching pattern\n"); + fprintf(stderr, " -t TYPE, --type TYPE\n"); + fprintf(stderr, " only test given type (q4_0, q4_1)\n"); + fprintf(stderr, "\n"); +} + +// Check if a layer is included/excluded by command line +bool layer_included(const quantize_stats_params params, const std::string & layer) { + for (const auto& excluded : params.exclude_layers) { + if (std::regex_search(layer, std::regex(excluded))) { + return false; + } + } + for (const auto& included : params.include_layers) { + if (std::regex_search(layer, std::regex(included))) { + return true; + } + } + return params.include_layers.empty(); +} + +// Update error statistics given vectors with the before/after result of quantization +void update_error_stats(int64_t nelements, const float * input, const float * output, error_stats & stats) { + for (int64_t i = 0; i < nelements; i++) { + double diff = input[i] - output[i]; + stats.total_error += diff * diff; + stats.max_error = fmax(fabs(diff), stats.max_error); + stats.error_histogram[std::max(std::min((size_t) floor(fabs(diff) / HISTOGRAM_RANGE * HISTOGRAM_BUCKETS), HISTOGRAM_BUCKETS-1), (size_t) 0)]++; + } + stats.num_samples += nelements; +} + +void combine_error_stats(error_stats & into, const error_stats & from) { + into.num_samples += from.num_samples; + into.total_error += from.total_error; + if (from.max_error > into.max_error) into.max_error = from.max_error; + for (size_t i=0; i= sum*quantile) { + return (i+1) * HISTOGRAM_RANGE / HISTOGRAM_BUCKETS; + } + } + return INFINITY; +} + +void print_error_stats(const std::string & name, const error_stats & stats, bool print_histogram) { + double rmse = sqrt(stats.total_error / (double) stats.num_samples); + double median = find_quantile(stats, .5); + double pct95 = find_quantile(stats, .95); + printf("%-50s: rmse %.8f, maxerr %.8f, 95pct<%.4f, median<%.4f\n", name.c_str(), rmse, stats.max_error, pct95, median); + if (print_histogram) { + printf("Error distribution:\n"); + for (size_t i = 0; i < HISTOGRAM_BUCKETS; i++) { + double lower = i * HISTOGRAM_RANGE / HISTOGRAM_BUCKETS; + double upper = (i+1) * HISTOGRAM_RANGE / HISTOGRAM_BUCKETS; + if (i == HISTOGRAM_BUCKETS -1) upper = INFINITY; + printf("[%3.4f, %3.4f): %11" PRIu64 "\n", lower, upper, stats.error_histogram[i]); + } + } +} + +// copied from ggml.h - verify that we can access this as a flat array +static bool tensor_is_contiguous(const struct ggml_tensor * tensor) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return + tensor->nb[0] == ggml_type_size(tensor->type) && + tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) && + tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && + tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; +} + +void test_roundtrip_on_chunk( + const ggml_tensor * layer, + int64_t offset, + int64_t chunk_size, + const quantize_fns_t & qfns, + bool use_reference, + float * input_scratch, + char * quantized_scratch, + float * output_scratch, + error_stats & stats) { + + if (layer->type == GGML_TYPE_F16) { + for (int i = 0; i < chunk_size; i++) { + input_scratch[i] = ggml_get_f32_1d(layer, i + offset); + } + } else { + input_scratch = ggml_get_data_f32(layer) + offset; + } + + if (use_reference) { + qfns.quantize_row_q_reference(input_scratch, quantized_scratch, chunk_size); + } else { + qfns.quantize_row_q(input_scratch, quantized_scratch, chunk_size); + } + qfns.dequantize_row_q(quantized_scratch, output_scratch, chunk_size); + + update_error_stats(chunk_size, input_scratch, output_scratch, stats); +} + + +// Run quantization function for a single layer and update error stats +void test_roundtrip_on_layer( + std::string & name, + bool print_layer_stats, + const quantize_fns_t & qfns, + bool use_reference, + const ggml_tensor * layer, + std::vector & input_scratch, + std::vector & quantized_scratch, + std::vector & output_scratch, + error_stats & total_error, + int max_thread = 0) { + + assert(tensor_is_contiguous(layer)); + error_stats layer_error {}; + uint64_t nelements = ggml_nelements(layer); + + float* input_scratch_ptr = nullptr; + if (layer->type == GGML_TYPE_F16) { + if (input_scratch.size() < nelements) input_scratch.resize(nelements); + input_scratch_ptr = input_scratch.data(); + } + if (quantized_scratch.size() < 4*nelements) quantized_scratch.resize(4*nelements); + if (output_scratch.size() < nelements) output_scratch.resize(nelements); + + if (max_thread < 1) max_thread = std::thread::hardware_concurrency(); + int chunk_size = 32*512; + int num_chunks = (nelements + chunk_size - 1)/chunk_size; + + if (num_chunks < 2 || max_thread < 2) { + test_roundtrip_on_chunk(layer, 0, nelements, qfns, use_reference, input_scratch_ptr, quantized_scratch.data(), + output_scratch.data(), print_layer_stats ? layer_error : total_error); + } else { + auto & stats = print_layer_stats ? layer_error : total_error; + std::mutex mutex; + uint64_t counter = 0; + auto compute = [&mutex, &counter, &stats, &qfns, nelements, layer, use_reference, input_scratch_ptr, + &quantized_scratch, &output_scratch, chunk_size] () { + error_stats local_stats {}; + while (true) { + std::unique_lock lock(mutex); + uint64_t offset = counter; counter += chunk_size; + if (offset >= nelements) { + combine_error_stats(stats, local_stats); + break; + } + lock.unlock(); + uint64_t chunk = offset + chunk_size < nelements ? chunk_size : nelements - offset; + test_roundtrip_on_chunk(layer, offset, chunk, qfns, use_reference, input_scratch_ptr + offset, + quantized_scratch.data() + 4*offset, output_scratch.data() + offset, local_stats); + } + }; + int nthread = std::min(num_chunks, max_thread); + std::vector workers(nthread-1); + for (auto& w : workers) w = std::thread(compute); + compute(); + for (auto& w : workers) w.join(); + } + + if (print_layer_stats) { + print_error_stats(name, layer_error, false); + combine_error_stats(total_error, layer_error); + } +} + +int main(int argc, char ** argv) { + ggml_time_init(); + + quantize_stats_params params; + + // read command line + + int max_thread = 0; + bool invalid_param = false; + std::string arg; + for (int i = 1; i < argc; i++) { + arg = argv[i]; + + if (arg == "-h" || arg == "--help") { + quantize_stats_print_usage(argc, argv); + exit(0); + } else if (arg == "-r" || arg == "--reference") { + params.reference = true; + } else if (arg == "-v") { + params.verbose = true; + } else if (arg == "-p" || arg == "--per-layer-stats") { + params.per_layer_stats = true; + } else if (arg == "--histogram") { + params.print_histogram = true; + } else if (arg == "-m" || arg == "--model") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.model = argv[i]; + } else if (arg == "-l" || arg == "--include-layer") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.include_layers.push_back(argv[i]); + } else if (arg == "-L" || arg == "--exclude-layer") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.exclude_layers.push_back(argv[i]); + } else if (arg == "-t" || arg == "--type") { + if (++i >= argc) { + invalid_param = true; + break; + } + int j; + for (j = 0; j < GGML_TYPE_COUNT; ++j) { + const auto * name = ggml_type_name((ggml_type) j); + if (name && strcmp(argv[i], name) == 0) break; + } + if (j < GGML_TYPE_COUNT) { + params.include_types.push_back((ggml_type) j); + } else { + fprintf(stderr, "error: %s not in list of types\n", argv[i]); + invalid_param = true; + } + } else if (arg == "-n" || arg == "--num-threads") { + if (++i >= argc) { + invalid_param = true; + break; + } + max_thread = atoi(argv[i]); + } else { + fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); + quantize_stats_print_usage(argc, argv); + return 1; + } + } + if (invalid_param) { + fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str()); + quantize_stats_print_usage(argc, argv); + return 1; + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + + // load the model + fprintf(stderr, "Loading model\n"); + + const int64_t t_main_start_us = ggml_time_us(); + llama_context * ctx; + + { + auto lparams = llama_context_default_params(); + + lparams.n_ctx = 256; + lparams.seed = 1; + lparams.f16_kv = false; + lparams.use_mlock = false; + + ctx = llama_init_from_file(params.model.c_str(), lparams); + + if (ctx == NULL) { + fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); + return 1; + } + } + + const auto &tensors = llama_internal_get_tensor_map(ctx); + + // check layer tensors + int included_layers = 0; + int64_t max_nelements = 0; + bool is_f16 = false; + for (const auto& kv_tensor : tensors) { + if (!layer_included(params, kv_tensor.first)) { + continue; + } + if (params.verbose) { + printf("%s: type %s, size %" PRId64 "\n", kv_tensor.first.c_str(), ggml_type_name(kv_tensor.second->type), ggml_nelements(kv_tensor.second)); + } + if (kv_tensor.second->type == GGML_TYPE_F16) { + is_f16 = true; + } else if (kv_tensor.second->type != GGML_TYPE_F32) { + fprintf(stderr, "%s: error: Quantization should be tested with a float model, " + "this model contains already quantized layers (%s is type %d)\n", __func__, kv_tensor.first.c_str(), kv_tensor.second->type); + llama_free(ctx); + return 1; + } + included_layers++; + max_nelements = std::max(max_nelements, ggml_nelements(kv_tensor.second)); + } + + if (is_f16) { + printf("note: source model is f16\n"); + } + printf("testing %d layers with max size %" PRId64 "\n", included_layers, max_nelements); + // allocate scratch space + std::vector input_scratch; + std::vector quantized_scratch; + std::vector output_scratch; + + // loop throught quantization types + for (int i = 0; i < GGML_TYPE_COUNT; i++) { + const ggml_type type = (ggml_type) i; + if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) { + continue; + } + quantize_fns_t qfns = ggml_internal_get_quantize_fn(i); + if (qfns.quantize_row_q && qfns.dequantize_row_q) { + if (params.verbose) { + printf("testing %s ...\n", ggml_type_name(type)); + } + + error_stats global_stats {}; + + for (const auto& kv_tensor : tensors) { + if (!layer_included(params, kv_tensor.first)) { + continue; + } + if (params.verbose) { + printf(" %s ...\n", kv_tensor.first.c_str()); + } + std::string layer_name { ggml_type_name(type) }; + layer_name += "::" + kv_tensor.first; + test_roundtrip_on_layer( + layer_name, + params.per_layer_stats, + qfns, + params.reference, + kv_tensor.second, + input_scratch, + quantized_scratch, + output_scratch, + global_stats, + max_thread + ); + } + + print_error_stats(ggml_type_name(type), global_stats, params.print_histogram); + } + } + + + llama_free(ctx); + // report timing + { + const int64_t t_main_end_us = ggml_time_us(); + + printf("\n"); + printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0); + } + + return 0; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/quantize/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/quantize/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..475fc8be885a640c093e02ccf92eae031a9584ba --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/quantize/CMakeLists.txt @@ -0,0 +1,7 @@ +set(TARGET quantize) +add_executable(${TARGET} quantize.cpp) +target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/quantize/README.md b/ctransformers/models/submodules/ggllm.cpp/examples/quantize/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f349e913e3d10dc84ca1e045048e92ede8a3d5d6 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/quantize/README.md @@ -0,0 +1,3 @@ +# quantize + +TODO diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/quantize/quantize.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/quantize/quantize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4e8e6f5239c051c8ae16ce8e28f1a00a0b8b0fb9 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/quantize/quantize.cpp @@ -0,0 +1,261 @@ +#include "build-info.h" + +#include "llama.h" + +#include +#include +#include +#include + +struct quant_option { + std::string name; + llama_ftype ftype; + std::string desc; +}; + +static const std::vector QUANT_OPTIONS = { + { + "Q4_0", + LLAMA_FTYPE_MOSTLY_Q4_0, + " 3.50G, +0.2499 ppl @ 7B - small, very high quality loss - legacy, prefer using Q3_K_M", + }, + { + "Q4_1", + LLAMA_FTYPE_MOSTLY_Q4_1, + " 3.90G, +0.1846 ppl @ 7B - small, substantial quality loss - legacy, prefer using Q3_K_L", + }, + { + "Q5_0", + LLAMA_FTYPE_MOSTLY_Q5_0, + " 4.30G, +0.0796 ppl @ 7B - medium, balanced quality - legacy, prefer using Q4_K_M", + }, + { + "Q5_1", + LLAMA_FTYPE_MOSTLY_Q5_1, + " 4.70G, +0.0415 ppl @ 7B - medium, low quality loss - legacy, prefer using Q5_K_M", + }, +#ifdef GGML_USE_K_QUANTS + { + "Q2_K", + LLAMA_FTYPE_MOSTLY_Q2_K, + " 2.67G, +0.8698 ppl @ 7B - smallest, extreme quality loss - not recommended", + }, + { + "Q3_K", + LLAMA_FTYPE_MOSTLY_Q3_K_M, + "alias for Q3_K_M" + }, + { + "Q3_K_S", + LLAMA_FTYPE_MOSTLY_Q3_K_S, + " 2.75G, +0.5505 ppl @ 7B - very small, very high quality loss", + }, + { + "Q3_K_M", + LLAMA_FTYPE_MOSTLY_Q3_K_M, + " 3.06G, +0.2437 ppl @ 7B - very small, very high quality loss", + }, + { + "Q3_K_L", + LLAMA_FTYPE_MOSTLY_Q3_K_L, + " 3.35G, +0.1803 ppl @ 7B - small, substantial quality loss", + }, + { + "Q4_K", + LLAMA_FTYPE_MOSTLY_Q4_K_M, + "alias for Q4_K_M", + }, + { + "Q4_K_S", + LLAMA_FTYPE_MOSTLY_Q4_K_S, + " 3.56G, +0.1149 ppl @ 7B - small, significant quality loss", + }, + { + "Q4_K_M", + LLAMA_FTYPE_MOSTLY_Q4_K_M, + " 3.80G, +0.0535 ppl @ 7B - medium, balanced quality - *recommended*", + }, + { + "Q5_K", + LLAMA_FTYPE_MOSTLY_Q5_K_M, + "alias for Q5_K_M", + }, + { + "Q5_K_S", + LLAMA_FTYPE_MOSTLY_Q5_K_S, + " 4.33G, +0.0353 ppl @ 7B - large, low quality loss - *recommended*", + }, + { + "Q5_K_M", + LLAMA_FTYPE_MOSTLY_Q5_K_M, + " 4.45G, +0.0142 ppl @ 7B - large, very low quality loss - *recommended*", + }, + { + "Q6_K", + LLAMA_FTYPE_MOSTLY_Q6_K, + " 5.15G, +0.0044 ppl @ 7B - very large, extremely low quality loss", + }, +#endif + { + "Q8_0", + LLAMA_FTYPE_MOSTLY_Q8_0, + " 6.70G, +0.0004 ppl @ 7B - very large, extremely low quality loss - not recommended", + }, + { + "F16", + LLAMA_FTYPE_MOSTLY_F16, + "13.00G @ 7B - extremely large, virtually no quality loss - not recommended", + }, + { + "F32", + LLAMA_FTYPE_ALL_F32, + "26.00G @ 7B - absolutely huge, lossless - not recommended", + }, +}; + + +bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) { + std::string ftype_str; + + for (auto ch : ftype_str_in) { + ftype_str.push_back(std::toupper(ch)); + } + for (auto & it : QUANT_OPTIONS) { + if (it.name == ftype_str) { + ftype = it.ftype; + ftype_str_out = it.name; + return true; + } + } + try { + int ftype_int = std::stoi(ftype_str); + for (auto & it : QUANT_OPTIONS) { + if (it.ftype == ftype_int) { + ftype = it.ftype; + ftype_str_out = it.name; + return true; + } + } + } + catch (...) { + // stoi failed + } + return false; +} + +// usage: +// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads] +// +void usage(const char * executable) { + fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.bin [model-quant.bin] type [nthreads]\n\n", executable); + fprintf(stderr, " --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n"); + fprintf(stderr, " --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n"); + fprintf(stderr, "\nAllowed quantization types:\n"); + for (auto & it : QUANT_OPTIONS) { + printf(" %2d or %-6s : %s\n", it.ftype, it.name.c_str(), it.desc.c_str()); + } + exit(1); +} + +int main(int argc, char ** argv) { + if (argc < 3) { + usage(argv[0]); + } + + llama_model_quantize_params params = llama_model_quantize_default_params(); + + int arg_idx = 1; + + for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) { + if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) { + params.quantize_output_tensor = false; + } else if (strcmp(argv[arg_idx], "--allow-requantize") == 0) { + params.allow_requantize = true; + } else { + usage(argv[0]); + } + } + + if (argc - arg_idx < 3) { + usage(argv[0]); + } + + llama_init_backend(); + + // parse command line arguments + const std::string fname_inp = argv[arg_idx]; + arg_idx++; + std::string fname_out; + + std::string ftype_str; + if (try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) { + std::string fpath; + const size_t pos = fname_inp.find_last_of('/'); + if (pos != std::string::npos) { + fpath = fname_inp.substr(0, pos + 1); + } + // export as [inp path]/ggml-model-[ftype].bin + fname_out = fpath + "ggml-model-" + ftype_str + ".bin"; + arg_idx++; + } + else { + fname_out = argv[arg_idx]; + arg_idx++; + + if (argc <= arg_idx) { + fprintf(stderr, "%s: missing ftype\n", __func__); + return 1; + } + if (!try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) { + fprintf(stderr, "%s: invalid ftype '%s'\n", __func__, argv[3]); + return 1; + } + arg_idx++; + } + + // parse nthreads + if (argc > arg_idx) { + try { + params.nthread = std::stoi(argv[arg_idx]); + } + catch (const std::exception & e) { + fprintf(stderr, "%s: invalid nthread '%s' (%s)\n", __func__, argv[arg_idx], e.what()); + return 1; + } + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + + fprintf(stderr, "%s: quantizing '%s' to '%s' as %s", __func__, fname_inp.c_str(), fname_out.c_str(), ftype_str.c_str()); + if (params.nthread > 0) { + fprintf(stderr, " using %d threads", params.nthread); + } + fprintf(stderr, "\n"); + + const int64_t t_main_start_us = llama_time_us(); + + int64_t t_quantize_us = 0; + + // load the model + { + const int64_t t_start_us = llama_time_us(); + + if (llama_model_quantize(fname_inp.c_str(), fname_out.c_str(), ¶ms)) { + fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str()); + return 1; + } + + t_quantize_us = llama_time_us() - t_start_us; + } + + // report timing + { + const int64_t t_main_end_us = llama_time_us(); + + printf("\n"); + printf("%s: quantize time = %8.2f ms\n", __func__, t_quantize_us/1000.0); + printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0); + } + + return 0; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/reason-act.sh b/ctransformers/models/submodules/ggllm.cpp/examples/reason-act.sh new file mode 100644 index 0000000000000000000000000000000000000000..e7fe655dbcea3a724f575abc16a8ef6de8197b5f --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/reason-act.sh @@ -0,0 +1,17 @@ + +#!/bin/bash + +cd `dirname $0` +cd .. + +# get -m model parameter otherwise defer to default +if [ "$1" == "-m" ]; then + MODEL="-m $2 " +fi + +./main $MODEL --color \ + -f ./prompts/reason-act.txt \ + -i --interactive-first \ + --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7 -c 2048 \ + -r "Question:" -r "Observation:" --in-prefix " " \ + -n -1 diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/save-load-state/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/save-load-state/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..08dbe5c2b3edf12b27d8de70cb2c12601e432f42 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/save-load-state/CMakeLists.txt @@ -0,0 +1,7 @@ +set(TARGET save-load-state) +add_executable(${TARGET} save-load-state.cpp) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/save-load-state/save-load-state.cpp b/ctransformers/models/submodules/ggllm.cpp/examples/save-load-state/save-load-state.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da4d37ad03de757fb1970d4447687d491ad3e79c --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/save-load-state/save-load-state.cpp @@ -0,0 +1,149 @@ +#include "common.h" +#include "llama.h" +#include "build-info.h" + +#include +#include +#include + +int main(int argc, char ** argv) { + gpt_params params; + params.seed = 42; + params.n_threads = 4; + params.repeat_last_n = 64; + params.prompt = "The quick brown fox"; + + if (gpt_params_parse(argc, argv, params) == false) { + return 1; + } + + fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); + + if (params.n_predict < 0) { + params.n_predict = 16; + } + + auto lparams = llama_context_default_params(); + + lparams.n_ctx = params.n_ctx; + lparams.seed = params.seed; + lparams.f16_kv = params.memory_f16; + lparams.use_mmap = params.use_mmap; + lparams.use_mlock = params.use_mlock; + + auto n_past = 0; + auto last_n_tokens_data = std::vector(params.repeat_last_n, 0); + + // init + auto ctx = llama_init_from_file(params.model.c_str(), lparams); + auto tokens = std::vector(params.n_ctx); + auto n_prompt_tokens = llama_tokenize(ctx, params.prompt.c_str(), tokens.data(), int(tokens.size()), true); + + if (n_prompt_tokens < 1) { + fprintf(stderr, "%s : failed to tokenize prompt\n", __func__); + return 1; + } + + // evaluate prompt + llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads); + + last_n_tokens_data.insert(last_n_tokens_data.end(), tokens.data(), tokens.data() + n_prompt_tokens); + n_past += n_prompt_tokens; + + const size_t state_size = llama_get_state_size(ctx); + uint8_t * state_mem = new uint8_t[state_size]; + + // Save state (rng, logits, embedding and kv_cache) to file + { + FILE *fp_write = fopen("dump_state.bin", "wb"); + llama_copy_state_data(ctx, state_mem); // could also copy directly to memory mapped file + fwrite(state_mem, 1, state_size, fp_write); + fclose(fp_write); + } + + // save state (last tokens) + const auto last_n_tokens_data_saved = std::vector(last_n_tokens_data); + const auto n_past_saved = n_past; + + // first run + printf("\n%s", params.prompt.c_str()); + + for (auto i = 0; i < params.n_predict; i++) { + auto logits = llama_get_logits(ctx); + auto n_vocab = llama_n_vocab(ctx); + std::vector candidates; + candidates.reserve(n_vocab); + for (llama_token token_id = 0; token_id < n_vocab; token_id++) { + candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f}); + } + llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false }; + auto next_token = llama_sample_token(ctx, &candidates_p); + auto next_token_str = llama_token_to_str(ctx, next_token); + last_n_tokens_data.push_back(next_token); + + printf("%s", next_token_str); + if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads)) { + fprintf(stderr, "\n%s : failed to evaluate\n", __func__); + return 1; + } + n_past += 1; + } + + printf("\n\n"); + + // free old model + llama_free(ctx); + + // load new model + auto ctx2 = llama_init_from_file(params.model.c_str(), lparams); + + // Load state (rng, logits, embedding and kv_cache) from file + { + FILE *fp_read = fopen("dump_state.bin", "rb"); + if (state_size != llama_get_state_size(ctx2)) { + fprintf(stderr, "\n%s : failed to validate state size\n", __func__); + return 1; + } + + const size_t ret = fread(state_mem, 1, state_size, fp_read); + if (ret != state_size) { + fprintf(stderr, "\n%s : failed to read state\n", __func__); + return 1; + } + + llama_set_state_data(ctx2, state_mem); // could also read directly from memory mapped file + fclose(fp_read); + } + + delete[] state_mem; + + // restore state (last tokens) + last_n_tokens_data = last_n_tokens_data_saved; + n_past = n_past_saved; + + // second run + for (auto i = 0; i < params.n_predict; i++) { + auto logits = llama_get_logits(ctx2); + auto n_vocab = llama_n_vocab(ctx2); + std::vector candidates; + candidates.reserve(n_vocab); + for (llama_token token_id = 0; token_id < n_vocab; token_id++) { + candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f}); + } + llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false }; + auto next_token = llama_sample_token(ctx2, &candidates_p); + auto next_token_str = llama_token_to_str(ctx2, next_token); + last_n_tokens_data.push_back(next_token); + + printf("%s", next_token_str); + if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads)) { + fprintf(stderr, "\n%s : failed to evaluate\n", __func__); + return 1; + } + n_past += 1; + } + + printf("\n\n"); + + return 0; +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/server/CMakeLists.txt b/ctransformers/models/submodules/ggllm.cpp/examples/server/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..07ba76ad35bbdb558beb3ecc0930b910c9b45c43 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/server/CMakeLists.txt @@ -0,0 +1,12 @@ +set(TARGET server) +option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +add_executable(${TARGET} server.cpp json.hpp httplib.h) +target_compile_definitions(${TARGET} PRIVATE + SERVER_VERBOSE=$ +) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) +if(TARGET BUILD_INFO) + add_dependencies(${TARGET} BUILD_INFO) +endif() diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/server/README.md b/ctransformers/models/submodules/ggllm.cpp/examples/server/README.md new file mode 100644 index 0000000000000000000000000000000000000000..474a28b20018ff5308b4a95551d69fc203e0a5ff --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/server/README.md @@ -0,0 +1,183 @@ +# llama.cpp/example/server + +This example demonstrates a simple HTTP API server to interact with llama.cpp. + +Command line options: + +- `--threads N`, `-t N`: Set the number of threads to use during computation. +- `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`). +- `-m ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses. +- `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. +- `-ngl N`, `--n-gpu-layers N`: When compiled with appropriate support (currently CLBlast or cuBLAS), this option allows offloading some layers to the GPU for computation. Generally results in increased performance. +- `-mg i, --main-gpu i`: When using multiple GPUs this option controls which GPU is used for small tensors for which the overhead of splitting the computation across all GPUs is not worthwhile. The GPU in question will use slightly more VRAM to store a scratch buffer for temporary results. By default GPU 0 is used. Requires cuBLAS. +- `-ts SPLIT, --tensor-split SPLIT`: When using multiple GPUs this option controls how large tensors should be split across all GPUs. `SPLIT` is a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order. For example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1. By default the data is split in proportion to VRAM but this may not be optimal for performance. Requires cuBLAS. +- `-lv, --low-vram`: Do not allocate a VRAM scratch buffer for holding temporary results. Reduces VRAM usage at the cost of performance, particularly prompt processing speed. Requires cuBLAS. +- `-b N`, `--batch-size N`: Set the batch size for prompt processing. Default: `512`. +- `--memory-f32`: Use 32-bit floats instead of 16-bit floats for memory key+value. Not recommended. +- `--mlock`: Lock the model in memory, preventing it from being swapped out when memory-mapped. +- `--no-mmap`: Do not memory-map the model. By default, models are mapped into memory, which allows the system to load only the necessary parts of the model as needed. +- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains. +- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation. +- `-to N`, `--timeout N`: Server read/write timeout in seconds. Default `600`. +- `--host`: Set the hostname or ip address to listen. Default `127.0.0.1`. +- `--port`: Set the port to listen. Default: `8080`. + +## Build + +Build llama.cpp with server from repository root with either make or CMake. + +- Using `make`: + + ```bash + LLAMA_BUILD_SERVER=1 make + ``` + +- Using `CMake`: + + ```bash + mkdir build-server + cd build-server + cmake -DLLAMA_BUILD_SERVER=ON .. + cmake --build . --config Release + ``` + +## Quick Start + +To get started right away, run the following command, making sure to use the correct path for the model you have: + +### Unix-based systems (Linux, macOS, etc.): + +```bash +./server -m models/7B/ggml-model.bin -c 2048 +``` + +### Windows: + +```powershell +server.exe -m models\7B\ggml-model.bin -c 2048 +``` + +The above command will start a server that by default listens on `127.0.0.1:8080`. +You can consume the endpoints with Postman or NodeJS with axios library. + +## Testing with CURL + +Using [curl](https://curl.se/). On Windows `curl.exe` should be available in the base OS. + +```sh +curl --request POST \ + --url http://localhost:8080/completion \ + --data '{"prompt": "Building a website can be done in 10 simple steps:","n_predict": 128}' +``` + +## Node JS Test + +You need to have [Node.js](https://nodejs.org/en) installed. + +```bash +mkdir llama-client +cd llama-client +npm init +npm install axios +``` + +Create a index.js file and put inside this: + +```javascript +const axios = require("axios"); + +const prompt = `Building a website can be done in 10 simple steps:`; + +async function Test() { + let result = await axios.post("http://127.0.0.1:8080/completion", { + prompt, + n_predict: 512, + }); + + // the response is received until completion finish + console.log(result.data.content); +} + +Test(); +``` + +And run it: + +```bash +node . +``` + +## API Endpoints + +- **POST** `/completion`: Given a prompt, it returns the predicted completion. + + *Options:* + + `temperature`: Adjust the randomness of the generated text (default: 0.8). + + `top_k`: Limit the next token selection to the K most probable tokens (default: 40). + + `top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9). + + `n_predict`: Set the number of tokens to predict when generating text. **Note:** May exceed the set limit slightly if the last token is a partial multibyte character. (default: 128, -1 = infinity). + + `n_keep`: Specify the number of tokens from the initial prompt to retain when the model resets its internal context. + By default, this value is set to 0 (meaning no tokens are kept). Use `-1` to retain all tokens from the initial prompt. + + `stream`: It allows receiving each predicted token in real-time instead of waiting for the completion to finish. To enable this, set to `true`. + + `prompt`: Provide a prompt. Internally, the prompt is compared, and it detects if a part has already been evaluated, and the remaining part will be evaluate. + + `stop`: Specify a JSON array of stopping strings. + These words will not be included in the completion, so make sure to add them to the prompt for the next iteration (default: []). + + `tfs_z`: Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled). + + `typical_p`: Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled). + + `repeat_penalty`: Control the repetition of token sequences in the generated text (default: 1.1). + + `repeat_last_n`: Last n tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size). + + `penalize_nl`: Penalize newline tokens when applying the repeat penalty (default: true). + + `presence_penalty`: Repeat alpha presence penalty (default: 0.0, 0.0 = disabled). + + `frequency_penalty`: Repeat alpha frequency penalty (default: 0.0, 0.0 = disabled); + + `mirostat`: Enable Mirostat sampling, controlling perplexity during text generation (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0). + + `mirostat_tau`: Set the Mirostat target entropy, parameter tau (default: 5.0). + + `mirostat_eta`: Set the Mirostat learning rate, parameter eta (default: 0.1). + + `seed`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed). + + `ignore_eos`: Ignore end of stream token and continue generating (default: false). + + `logit_bias`: Modify the likelihood of a token appearing in the generated text completion. For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood. Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced (default: []). + +- **POST** `/tokenize`: Tokenize a given text. + + *Options:* + + `content`: Set the text to tokenize. + +## More examples + +### Interactive mode + +Check the sample in [chat.mjs](chat.mjs). +Run with NodeJS version 16 or later: + +```sh +node chat.mjs +``` + +Another sample in [chat.sh](chat.sh). +Requires [bash](https://www.gnu.org/software/bash/), [curl](https://curl.se) and [jq](https://jqlang.github.io/jq/). +Run with bash: + +```sh +bash chat.sh +``` diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/server/chat.mjs b/ctransformers/models/submodules/ggllm.cpp/examples/server/chat.mjs new file mode 100644 index 0000000000000000000000000000000000000000..8269e2592733b55a901ee461d9065c39a794cf21 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/server/chat.mjs @@ -0,0 +1,89 @@ +import * as readline from 'node:readline' +import { stdin, stdout } from 'node:process' + +const API_URL = 'http://127.0.0.1:8080' + +const chat = [ + { + human: "Hello, Assistant.", + assistant: "Hello. How may I help you today?" + }, + { + human: "Please tell me the largest city in Europe.", + assistant: "Sure. The largest city in Europe is Moscow, the capital of Russia." + }, +] + +const instruction = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.` + +function format_prompt(question) { + return `${instruction}\n${ + chat.map(m =>`### Human: ${m.human}\n### Assistant: ${m.assistant}`).join("\n") + }\n### Human: ${question}\n### Assistant:` +} + +async function tokenize(content) { + const result = await fetch(`${API_URL}/tokenize`, { + method: 'POST', + body: JSON.stringify({ content }) + }) + + if (!result.ok) { + return [] + } + + return await result.json().tokens +} + +const n_keep = await tokenize(instruction).length + +async function chat_completion(question) { + const result = await fetch(`${API_URL}/completion`, { + method: 'POST', + body: JSON.stringify({ + prompt: format_prompt(question), + temperature: 0.2, + top_k: 40, + top_p: 0.9, + n_keep: n_keep, + n_predict: 256, + stop: ["\n### Human:"], // stop completion after generating this + stream: true, + }) + }) + + if (!result.ok) { + return + } + + let answer = '' + + for await (var chunk of result.body) { + const t = Buffer.from(chunk).toString('utf8') + if (t.startsWith('data: ')) { + const message = JSON.parse(t.substring(6)) + answer += message.content + process.stdout.write(message.content) + if (message.stop) { + if (message.truncated) { + chat.shift() + } + break + } + } + } + + process.stdout.write('\n') + chat.push({ human: question, assistant: answer.trimStart() }) +} + +const rl = readline.createInterface({ input: stdin, output: stdout }); + +const readlineQuestion = (rl, query, options) => new Promise((resolve, reject) => { + rl.question(query, options, resolve) +}); + +while(true) { + const question = await readlineQuestion(rl, '> ') + await chat_completion(question) +} diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/server/chat.sh b/ctransformers/models/submodules/ggllm.cpp/examples/server/chat.sh new file mode 100644 index 0000000000000000000000000000000000000000..a89f8e908c403b22b656471d95261812fbcffc87 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/server/chat.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +API_URL="${API_URL:-http://127.0.0.1:8080}" + +CHAT=( + "Hello, Assistant." + "Hello. How may I help you today?" + "Please tell me the largest city in Europe." + "Sure. The largest city in Europe is Moscow, the capital of Russia." +) + +INSTRUCTION="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." + +trim() { + shopt -s extglob + set -- "${1##+([[:space:]])}" + printf "%s" "${1%%+([[:space:]])}" +} + +trim_trailing() { + shopt -s extglob + printf "%s" "${1%%+([[:space:]])}" +} + +format_prompt() { + echo -n "${INSTRUCTION}" + printf "\n### Human: %s\n### Assistant: %s" "${CHAT[@]}" "$1" +} + +tokenize() { + curl \ + --silent \ + --request POST \ + --url "${API_URL}/tokenize" \ + --data-raw "$(jq -ns --arg content "$1" '{content:$content}')" \ + | jq '.tokens[]' +} + +N_KEEP=$(tokenize "${INSTRUCTION}" | wc -l) + +chat_completion() { + PROMPT="$(trim_trailing "$(format_prompt "$1")")" + DATA="$(echo -n "$PROMPT" | jq -Rs --argjson n_keep $N_KEEP '{ + prompt: ., + temperature: 0.2, + top_k: 40, + top_p: 0.9, + n_keep: $n_keep, + n_predict: 256, + stop: ["\n### Human:"], + stream: true + }')" + + ANSWER='' + + while IFS= read -r LINE; do + if [[ $LINE = data:* ]]; then + CONTENT="$(echo "${LINE:5}" | jq -r '.content')" + printf "%s" "${CONTENT}" + ANSWER+="${CONTENT}" + fi + done < <(curl \ + --silent \ + --no-buffer \ + --request POST \ + --url "${API_URL}/completion" \ + --data-raw "${DATA}") + + printf "\n" + + CHAT+=("$1" "$(trim "$ANSWER")") +} + +while true; do + read -r -e -p "> " QUESTION + chat_completion "${QUESTION}" +done diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/server/httplib.h b/ctransformers/models/submodules/ggllm.cpp/examples/server/httplib.h new file mode 100644 index 0000000000000000000000000000000000000000..28746000cddceb5ecb8d8b56e7a25ab08ea7ee4f --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/server/httplib.h @@ -0,0 +1,8794 @@ +// +// httplib.h +// +// Copyright (c) 2023 Yuji Hirose. All rights reserved. +// MIT License +// + +#ifndef CPPHTTPLIB_HTTPLIB_H +#define CPPHTTPLIB_HTTPLIB_H + +#define CPPHTTPLIB_VERSION "0.12.2" + +/* + * Configuration + */ + +#ifndef CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND +#define CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND 5 +#endif + +#ifndef CPPHTTPLIB_KEEPALIVE_MAX_COUNT +#define CPPHTTPLIB_KEEPALIVE_MAX_COUNT 5 +#endif + +#ifndef CPPHTTPLIB_CONNECTION_TIMEOUT_SECOND +#define CPPHTTPLIB_CONNECTION_TIMEOUT_SECOND 300 +#endif + +#ifndef CPPHTTPLIB_CONNECTION_TIMEOUT_USECOND +#define CPPHTTPLIB_CONNECTION_TIMEOUT_USECOND 0 +#endif + +#ifndef CPPHTTPLIB_READ_TIMEOUT_SECOND +#define CPPHTTPLIB_READ_TIMEOUT_SECOND 5 +#endif + +#ifndef CPPHTTPLIB_READ_TIMEOUT_USECOND +#define CPPHTTPLIB_READ_TIMEOUT_USECOND 0 +#endif + +#ifndef CPPHTTPLIB_WRITE_TIMEOUT_SECOND +#define CPPHTTPLIB_WRITE_TIMEOUT_SECOND 5 +#endif + +#ifndef CPPHTTPLIB_WRITE_TIMEOUT_USECOND +#define CPPHTTPLIB_WRITE_TIMEOUT_USECOND 0 +#endif + +#ifndef CPPHTTPLIB_IDLE_INTERVAL_SECOND +#define CPPHTTPLIB_IDLE_INTERVAL_SECOND 0 +#endif + +#ifndef CPPHTTPLIB_IDLE_INTERVAL_USECOND +#ifdef _WIN32 +#define CPPHTTPLIB_IDLE_INTERVAL_USECOND 10000 +#else +#define CPPHTTPLIB_IDLE_INTERVAL_USECOND 0 +#endif +#endif + +#ifndef CPPHTTPLIB_REQUEST_URI_MAX_LENGTH +#define CPPHTTPLIB_REQUEST_URI_MAX_LENGTH 8192 +#endif + +#ifndef CPPHTTPLIB_HEADER_MAX_LENGTH +#define CPPHTTPLIB_HEADER_MAX_LENGTH 8192 +#endif + +#ifndef CPPHTTPLIB_REDIRECT_MAX_COUNT +#define CPPHTTPLIB_REDIRECT_MAX_COUNT 20 +#endif + +#ifndef CPPHTTPLIB_MULTIPART_FORM_DATA_FILE_MAX_COUNT +#define CPPHTTPLIB_MULTIPART_FORM_DATA_FILE_MAX_COUNT 1024 +#endif + +#ifndef CPPHTTPLIB_PAYLOAD_MAX_LENGTH +#define CPPHTTPLIB_PAYLOAD_MAX_LENGTH ((std::numeric_limits::max)()) +#endif + +#ifndef CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH +#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 8192 +#endif + +#ifndef CPPHTTPLIB_TCP_NODELAY +#define CPPHTTPLIB_TCP_NODELAY false +#endif + +#ifndef CPPHTTPLIB_RECV_BUFSIZ +#define CPPHTTPLIB_RECV_BUFSIZ size_t(4096u) +#endif + +#ifndef CPPHTTPLIB_COMPRESSION_BUFSIZ +#define CPPHTTPLIB_COMPRESSION_BUFSIZ size_t(16384u) +#endif + +#ifndef CPPHTTPLIB_THREAD_POOL_COUNT +#define CPPHTTPLIB_THREAD_POOL_COUNT \ + ((std::max)(8u, std::thread::hardware_concurrency() > 0 \ + ? std::thread::hardware_concurrency() - 1 \ + : 0)) +#endif + +#ifndef CPPHTTPLIB_RECV_FLAGS +#define CPPHTTPLIB_RECV_FLAGS 0 +#endif + +#ifndef CPPHTTPLIB_SEND_FLAGS +#define CPPHTTPLIB_SEND_FLAGS 0 +#endif + +#ifndef CPPHTTPLIB_LISTEN_BACKLOG +#define CPPHTTPLIB_LISTEN_BACKLOG 5 +#endif + +/* + * Headers + */ + +#ifdef _WIN32 +#ifndef _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_WARNINGS +#endif //_CRT_SECURE_NO_WARNINGS + +#ifndef _CRT_NONSTDC_NO_DEPRECATE +#define _CRT_NONSTDC_NO_DEPRECATE +#endif //_CRT_NONSTDC_NO_DEPRECATE + +#if defined(_MSC_VER) +#if _MSC_VER < 1900 +#error Sorry, Visual Studio versions prior to 2015 are not supported +#endif + +#pragma comment(lib, "ws2_32.lib") + +#ifdef _WIN64 +using ssize_t = __int64; +#else +using ssize_t = long; +#endif +#endif // _MSC_VER + +#ifndef S_ISREG +#define S_ISREG(m) (((m)&S_IFREG) == S_IFREG) +#endif // S_ISREG + +#ifndef S_ISDIR +#define S_ISDIR(m) (((m)&S_IFDIR) == S_IFDIR) +#endif // S_ISDIR + +#ifndef NOMINMAX +#define NOMINMAX +#endif // NOMINMAX + +#include +#include +#include + +#ifndef WSA_FLAG_NO_HANDLE_INHERIT +#define WSA_FLAG_NO_HANDLE_INHERIT 0x80 +#endif + +#ifndef strcasecmp +#define strcasecmp _stricmp +#endif // strcasecmp + +using socket_t = SOCKET; +#ifdef CPPHTTPLIB_USE_POLL +#define poll(fds, nfds, timeout) WSAPoll(fds, nfds, timeout) +#endif + +#else // not _WIN32 + +#include +#ifndef _AIX +#include +#endif +#include +#include +#include +#ifdef __linux__ +#include +#endif +#include +#ifdef CPPHTTPLIB_USE_POLL +#include +#endif +#include +#include +#include +#include +#include +#include + +using socket_t = int; +#ifndef INVALID_SOCKET +#define INVALID_SOCKET (-1) +#endif +#endif //_WIN32 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef _WIN32 +#include + +// these are defined in wincrypt.h and it breaks compilation if BoringSSL is +// used +#undef X509_NAME +#undef X509_CERT_PAIR +#undef X509_EXTENSIONS +#undef PKCS7_SIGNER_INFO + +#ifdef _MSC_VER +#pragma comment(lib, "crypt32.lib") +#pragma comment(lib, "cryptui.lib") +#endif +#elif defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) && defined(__APPLE__) +#include +#if TARGET_OS_OSX +#include +#include +#endif // TARGET_OS_OSX +#endif // _WIN32 + +#include +#include +#include +#include + +#if defined(_WIN32) && defined(OPENSSL_USE_APPLINK) +#include +#endif + +#include +#include + +#if OPENSSL_VERSION_NUMBER < 0x1010100fL +#error Sorry, OpenSSL versions prior to 1.1.1 are not supported +#elif OPENSSL_VERSION_NUMBER < 0x30000000L +#define SSL_get1_peer_certificate SSL_get_peer_certificate +#endif + +#endif + +#ifdef CPPHTTPLIB_ZLIB_SUPPORT +#include +#endif + +#ifdef CPPHTTPLIB_BROTLI_SUPPORT +#include +#include +#endif + +/* + * Declaration + */ +namespace httplib { + +namespace detail { + +/* + * Backport std::make_unique from C++14. + * + * NOTE: This code came up with the following stackoverflow post: + * https://stackoverflow.com/questions/10149840/c-arrays-and-make-unique + * + */ + +template +typename std::enable_if::value, std::unique_ptr>::type +make_unique(Args &&...args) { + return std::unique_ptr(new T(std::forward(args)...)); +} + +template +typename std::enable_if::value, std::unique_ptr>::type +make_unique(std::size_t n) { + typedef typename std::remove_extent::type RT; + return std::unique_ptr(new RT[n]); +} + +struct ci { + bool operator()(const std::string &s1, const std::string &s2) const { + return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), + s2.end(), + [](unsigned char c1, unsigned char c2) { + return ::tolower(c1) < ::tolower(c2); + }); + } +}; + +// This is based on +// "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189". + +struct scope_exit { + explicit scope_exit(std::function &&f) + : exit_function(std::move(f)), execute_on_destruction{true} {} + + scope_exit(scope_exit &&rhs) + : exit_function(std::move(rhs.exit_function)), + execute_on_destruction{rhs.execute_on_destruction} { + rhs.release(); + } + + ~scope_exit() { + if (execute_on_destruction) { this->exit_function(); } + } + + void release() { this->execute_on_destruction = false; } + +private: + scope_exit(const scope_exit &) = delete; + void operator=(const scope_exit &) = delete; + scope_exit &operator=(scope_exit &&) = delete; + + std::function exit_function; + bool execute_on_destruction; +}; + +} // namespace detail + +using Headers = std::multimap; + +using Params = std::multimap; +using Match = std::smatch; + +using Progress = std::function; + +struct Response; +using ResponseHandler = std::function; + +struct MultipartFormData { + std::string name; + std::string content; + std::string filename; + std::string content_type; +}; +using MultipartFormDataItems = std::vector; +using MultipartFormDataMap = std::multimap; + +class DataSink { +public: + DataSink() : os(&sb_), sb_(*this) {} + + DataSink(const DataSink &) = delete; + DataSink &operator=(const DataSink &) = delete; + DataSink(DataSink &&) = delete; + DataSink &operator=(DataSink &&) = delete; + + std::function write; + std::function done; + std::function done_with_trailer; + std::ostream os; + +private: + class data_sink_streambuf : public std::streambuf { + public: + explicit data_sink_streambuf(DataSink &sink) : sink_(sink) {} + + protected: + std::streamsize xsputn(const char *s, std::streamsize n) { + sink_.write(s, static_cast(n)); + return n; + } + + private: + DataSink &sink_; + }; + + data_sink_streambuf sb_; +}; + +using ContentProvider = + std::function; + +using ContentProviderWithoutLength = + std::function; + +using ContentProviderResourceReleaser = std::function; + +struct MultipartFormDataProvider { + std::string name; + ContentProviderWithoutLength provider; + std::string filename; + std::string content_type; +}; +using MultipartFormDataProviderItems = std::vector; + +using ContentReceiverWithProgress = + std::function; + +using ContentReceiver = + std::function; + +using MultipartContentHeader = + std::function; + +class ContentReader { +public: + using Reader = std::function; + using MultipartReader = std::function; + + ContentReader(Reader reader, MultipartReader multipart_reader) + : reader_(std::move(reader)), + multipart_reader_(std::move(multipart_reader)) {} + + bool operator()(MultipartContentHeader header, + ContentReceiver receiver) const { + return multipart_reader_(std::move(header), std::move(receiver)); + } + + bool operator()(ContentReceiver receiver) const { + return reader_(std::move(receiver)); + } + + Reader reader_; + MultipartReader multipart_reader_; +}; + +using Range = std::pair; +using Ranges = std::vector; + +struct Request { + std::string method; + std::string path; + Headers headers; + std::string body; + + std::string remote_addr; + int remote_port = -1; + std::string local_addr; + int local_port = -1; + + // for server + std::string version; + std::string target; + Params params; + MultipartFormDataMap files; + Ranges ranges; + Match matches; + + // for client + ResponseHandler response_handler; + ContentReceiverWithProgress content_receiver; + Progress progress; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + const SSL *ssl = nullptr; +#endif + + bool has_header(const std::string &key) const; + std::string get_header_value(const std::string &key, size_t id = 0) const; + template + T get_header_value(const std::string &key, size_t id = 0) const; + size_t get_header_value_count(const std::string &key) const; + void set_header(const std::string &key, const std::string &val); + + bool has_param(const std::string &key) const; + std::string get_param_value(const std::string &key, size_t id = 0) const; + size_t get_param_value_count(const std::string &key) const; + + bool is_multipart_form_data() const; + + bool has_file(const std::string &key) const; + MultipartFormData get_file_value(const std::string &key) const; + std::vector get_file_values(const std::string &key) const; + + // private members... + size_t redirect_count_ = CPPHTTPLIB_REDIRECT_MAX_COUNT; + size_t content_length_ = 0; + ContentProvider content_provider_; + bool is_chunked_content_provider_ = false; + size_t authorization_count_ = 0; +}; + +struct Response { + std::string version; + int status = -1; + std::string reason; + Headers headers; + std::string body; + std::string location; // Redirect location + + bool has_header(const std::string &key) const; + std::string get_header_value(const std::string &key, size_t id = 0) const; + template + T get_header_value(const std::string &key, size_t id = 0) const; + size_t get_header_value_count(const std::string &key) const; + void set_header(const std::string &key, const std::string &val); + + void set_redirect(const std::string &url, int status = 302); + void set_content(const char *s, size_t n, const std::string &content_type); + void set_content(const std::string &s, const std::string &content_type); + + void set_content_provider( + size_t length, const std::string &content_type, ContentProvider provider, + ContentProviderResourceReleaser resource_releaser = nullptr); + + void set_content_provider( + const std::string &content_type, ContentProviderWithoutLength provider, + ContentProviderResourceReleaser resource_releaser = nullptr); + + void set_chunked_content_provider( + const std::string &content_type, ContentProviderWithoutLength provider, + ContentProviderResourceReleaser resource_releaser = nullptr); + + Response() = default; + Response(const Response &) = default; + Response &operator=(const Response &) = default; + Response(Response &&) = default; + Response &operator=(Response &&) = default; + ~Response() { + if (content_provider_resource_releaser_) { + content_provider_resource_releaser_(content_provider_success_); + } + } + + // private members... + size_t content_length_ = 0; + ContentProvider content_provider_; + ContentProviderResourceReleaser content_provider_resource_releaser_; + bool is_chunked_content_provider_ = false; + bool content_provider_success_ = false; +}; + +class Stream { +public: + virtual ~Stream() = default; + + virtual bool is_readable() const = 0; + virtual bool is_writable() const = 0; + + virtual ssize_t read(char *ptr, size_t size) = 0; + virtual ssize_t write(const char *ptr, size_t size) = 0; + virtual void get_remote_ip_and_port(std::string &ip, int &port) const = 0; + virtual void get_local_ip_and_port(std::string &ip, int &port) const = 0; + virtual socket_t socket() const = 0; + + template + ssize_t write_format(const char *fmt, const Args &...args); + ssize_t write(const char *ptr); + ssize_t write(const std::string &s); +}; + +class TaskQueue { +public: + TaskQueue() = default; + virtual ~TaskQueue() = default; + + virtual void enqueue(std::function fn) = 0; + virtual void shutdown() = 0; + + virtual void on_idle() {} +}; + +class ThreadPool : public TaskQueue { +public: + explicit ThreadPool(size_t n) : shutdown_(false) { + while (n) { + threads_.emplace_back(worker(*this)); + n--; + } + } + + ThreadPool(const ThreadPool &) = delete; + ~ThreadPool() override = default; + + void enqueue(std::function fn) override { + { + std::unique_lock lock(mutex_); + jobs_.push_back(std::move(fn)); + } + + cond_.notify_one(); + } + + void shutdown() override { + // Stop all worker threads... + { + std::unique_lock lock(mutex_); + shutdown_ = true; + } + + cond_.notify_all(); + + // Join... + for (auto &t : threads_) { + t.join(); + } + } + +private: + struct worker { + explicit worker(ThreadPool &pool) : pool_(pool) {} + + void operator()() { + for (;;) { + std::function fn; + { + std::unique_lock lock(pool_.mutex_); + + pool_.cond_.wait( + lock, [&] { return !pool_.jobs_.empty() || pool_.shutdown_; }); + + if (pool_.shutdown_ && pool_.jobs_.empty()) { break; } + + fn = std::move(pool_.jobs_.front()); + pool_.jobs_.pop_front(); + } + + assert(true == static_cast(fn)); + fn(); + } + } + + ThreadPool &pool_; + }; + friend struct worker; + + std::vector threads_; + std::list> jobs_; + + bool shutdown_; + + std::condition_variable cond_; + std::mutex mutex_; +}; + +using Logger = std::function; + +using SocketOptions = std::function; + +void default_socket_options(socket_t sock); + +class Server { +public: + using Handler = std::function; + + using ExceptionHandler = + std::function; + + enum class HandlerResponse { + Handled, + Unhandled, + }; + using HandlerWithResponse = + std::function; + + using HandlerWithContentReader = std::function; + + using Expect100ContinueHandler = + std::function; + + Server(); + + virtual ~Server(); + + virtual bool is_valid() const; + + Server &Get(const std::string &pattern, Handler handler); + Server &Post(const std::string &pattern, Handler handler); + Server &Post(const std::string &pattern, HandlerWithContentReader handler); + Server &Put(const std::string &pattern, Handler handler); + Server &Put(const std::string &pattern, HandlerWithContentReader handler); + Server &Patch(const std::string &pattern, Handler handler); + Server &Patch(const std::string &pattern, HandlerWithContentReader handler); + Server &Delete(const std::string &pattern, Handler handler); + Server &Delete(const std::string &pattern, HandlerWithContentReader handler); + Server &Options(const std::string &pattern, Handler handler); + + bool set_base_dir(const std::string &dir, + const std::string &mount_point = std::string()); + bool set_mount_point(const std::string &mount_point, const std::string &dir, + Headers headers = Headers()); + bool remove_mount_point(const std::string &mount_point); + Server &set_file_extension_and_mimetype_mapping(const std::string &ext, + const std::string &mime); + Server &set_file_request_handler(Handler handler); + + Server &set_error_handler(HandlerWithResponse handler); + Server &set_error_handler(Handler handler); + Server &set_exception_handler(ExceptionHandler handler); + Server &set_pre_routing_handler(HandlerWithResponse handler); + Server &set_post_routing_handler(Handler handler); + + Server &set_expect_100_continue_handler(Expect100ContinueHandler handler); + Server &set_logger(Logger logger); + + Server &set_address_family(int family); + Server &set_tcp_nodelay(bool on); + Server &set_socket_options(SocketOptions socket_options); + + Server &set_default_headers(Headers headers); + + Server &set_keep_alive_max_count(size_t count); + Server &set_keep_alive_timeout(time_t sec); + + Server &set_read_timeout(time_t sec, time_t usec = 0); + template + Server &set_read_timeout(const std::chrono::duration &duration); + + Server &set_write_timeout(time_t sec, time_t usec = 0); + template + Server &set_write_timeout(const std::chrono::duration &duration); + + Server &set_idle_interval(time_t sec, time_t usec = 0); + template + Server &set_idle_interval(const std::chrono::duration &duration); + + Server &set_payload_max_length(size_t length); + + bool bind_to_port(const std::string &host, int port, int socket_flags = 0); + int bind_to_any_port(const std::string &host, int socket_flags = 0); + bool listen_after_bind(); + + bool listen(const std::string &host, int port, int socket_flags = 0); + + bool is_running() const; + void wait_until_ready() const; + void stop(); + + std::function new_task_queue; + +protected: + bool process_request(Stream &strm, bool close_connection, + bool &connection_closed, + const std::function &setup_request); + + std::atomic svr_sock_{INVALID_SOCKET}; + size_t keep_alive_max_count_ = CPPHTTPLIB_KEEPALIVE_MAX_COUNT; + time_t keep_alive_timeout_sec_ = CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND; + time_t read_timeout_sec_ = CPPHTTPLIB_READ_TIMEOUT_SECOND; + time_t read_timeout_usec_ = CPPHTTPLIB_READ_TIMEOUT_USECOND; + time_t write_timeout_sec_ = CPPHTTPLIB_WRITE_TIMEOUT_SECOND; + time_t write_timeout_usec_ = CPPHTTPLIB_WRITE_TIMEOUT_USECOND; + time_t idle_interval_sec_ = CPPHTTPLIB_IDLE_INTERVAL_SECOND; + time_t idle_interval_usec_ = CPPHTTPLIB_IDLE_INTERVAL_USECOND; + size_t payload_max_length_ = CPPHTTPLIB_PAYLOAD_MAX_LENGTH; + +private: + using Handlers = std::vector>; + using HandlersForContentReader = + std::vector>; + + socket_t create_server_socket(const std::string &host, int port, + int socket_flags, + SocketOptions socket_options) const; + int bind_internal(const std::string &host, int port, int socket_flags); + bool listen_internal(); + + bool routing(Request &req, Response &res, Stream &strm); + bool handle_file_request(const Request &req, Response &res, + bool head = false); + bool dispatch_request(Request &req, Response &res, const Handlers &handlers); + bool + dispatch_request_for_content_reader(Request &req, Response &res, + ContentReader content_reader, + const HandlersForContentReader &handlers); + + bool parse_request_line(const char *s, Request &req); + void apply_ranges(const Request &req, Response &res, + std::string &content_type, std::string &boundary); + bool write_response(Stream &strm, bool close_connection, const Request &req, + Response &res); + bool write_response_with_content(Stream &strm, bool close_connection, + const Request &req, Response &res); + bool write_response_core(Stream &strm, bool close_connection, + const Request &req, Response &res, + bool need_apply_ranges); + bool write_content_with_provider(Stream &strm, const Request &req, + Response &res, const std::string &boundary, + const std::string &content_type); + bool read_content(Stream &strm, Request &req, Response &res); + bool + read_content_with_content_receiver(Stream &strm, Request &req, Response &res, + ContentReceiver receiver, + MultipartContentHeader multipart_header, + ContentReceiver multipart_receiver); + bool read_content_core(Stream &strm, Request &req, Response &res, + ContentReceiver receiver, + MultipartContentHeader multipart_header, + ContentReceiver multipart_receiver); + + virtual bool process_and_close_socket(socket_t sock); + + struct MountPointEntry { + std::string mount_point; + std::string base_dir; + Headers headers; + }; + std::vector base_dirs_; + + std::atomic is_running_{false}; + std::atomic done_{false}; + std::map file_extension_and_mimetype_map_; + Handler file_request_handler_; + Handlers get_handlers_; + Handlers post_handlers_; + HandlersForContentReader post_handlers_for_content_reader_; + Handlers put_handlers_; + HandlersForContentReader put_handlers_for_content_reader_; + Handlers patch_handlers_; + HandlersForContentReader patch_handlers_for_content_reader_; + Handlers delete_handlers_; + HandlersForContentReader delete_handlers_for_content_reader_; + Handlers options_handlers_; + HandlerWithResponse error_handler_; + ExceptionHandler exception_handler_; + HandlerWithResponse pre_routing_handler_; + Handler post_routing_handler_; + Logger logger_; + Expect100ContinueHandler expect_100_continue_handler_; + + int address_family_ = AF_UNSPEC; + bool tcp_nodelay_ = CPPHTTPLIB_TCP_NODELAY; + SocketOptions socket_options_ = default_socket_options; + + Headers default_headers_; +}; + +enum class Error { + Success = 0, + Unknown, + Connection, + BindIPAddress, + Read, + Write, + ExceedRedirectCount, + Canceled, + SSLConnection, + SSLLoadingCerts, + SSLServerVerification, + UnsupportedMultipartBoundaryChars, + Compression, + ConnectionTimeout, + + // For internal use only + SSLPeerCouldBeClosed_, +}; + +std::string to_string(const Error error); + +std::ostream &operator<<(std::ostream &os, const Error &obj); + +class Result { +public: + Result(std::unique_ptr &&res, Error err, + Headers &&request_headers = Headers{}) + : res_(std::move(res)), err_(err), + request_headers_(std::move(request_headers)) {} + // Response + operator bool() const { return res_ != nullptr; } + bool operator==(std::nullptr_t) const { return res_ == nullptr; } + bool operator!=(std::nullptr_t) const { return res_ != nullptr; } + const Response &value() const { return *res_; } + Response &value() { return *res_; } + const Response &operator*() const { return *res_; } + Response &operator*() { return *res_; } + const Response *operator->() const { return res_.get(); } + Response *operator->() { return res_.get(); } + + // Error + Error error() const { return err_; } + + // Request Headers + bool has_request_header(const std::string &key) const; + std::string get_request_header_value(const std::string &key, + size_t id = 0) const; + template + T get_request_header_value(const std::string &key, size_t id = 0) const; + size_t get_request_header_value_count(const std::string &key) const; + +private: + std::unique_ptr res_; + Error err_; + Headers request_headers_; +}; + +class ClientImpl { +public: + explicit ClientImpl(const std::string &host); + + explicit ClientImpl(const std::string &host, int port); + + explicit ClientImpl(const std::string &host, int port, + const std::string &client_cert_path, + const std::string &client_key_path); + + virtual ~ClientImpl(); + + virtual bool is_valid() const; + + Result Get(const std::string &path); + Result Get(const std::string &path, const Headers &headers); + Result Get(const std::string &path, Progress progress); + Result Get(const std::string &path, const Headers &headers, + Progress progress); + Result Get(const std::string &path, ContentReceiver content_receiver); + Result Get(const std::string &path, const Headers &headers, + ContentReceiver content_receiver); + Result Get(const std::string &path, ContentReceiver content_receiver, + Progress progress); + Result Get(const std::string &path, const Headers &headers, + ContentReceiver content_receiver, Progress progress); + Result Get(const std::string &path, ResponseHandler response_handler, + ContentReceiver content_receiver); + Result Get(const std::string &path, const Headers &headers, + ResponseHandler response_handler, + ContentReceiver content_receiver); + Result Get(const std::string &path, ResponseHandler response_handler, + ContentReceiver content_receiver, Progress progress); + Result Get(const std::string &path, const Headers &headers, + ResponseHandler response_handler, ContentReceiver content_receiver, + Progress progress); + + Result Get(const std::string &path, const Params ¶ms, + const Headers &headers, Progress progress = nullptr); + Result Get(const std::string &path, const Params ¶ms, + const Headers &headers, ContentReceiver content_receiver, + Progress progress = nullptr); + Result Get(const std::string &path, const Params ¶ms, + const Headers &headers, ResponseHandler response_handler, + ContentReceiver content_receiver, Progress progress = nullptr); + + Result Head(const std::string &path); + Result Head(const std::string &path, const Headers &headers); + + Result Post(const std::string &path); + Result Post(const std::string &path, const Headers &headers); + Result Post(const std::string &path, const char *body, size_t content_length, + const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, const char *body, + size_t content_length, const std::string &content_type); + Result Post(const std::string &path, const std::string &body, + const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type); + Result Post(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type); + Result Post(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, + size_t content_length, ContentProvider content_provider, + const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Post(const std::string &path, const Params ¶ms); + Result Post(const std::string &path, const Headers &headers, + const Params ¶ms); + Result Post(const std::string &path, const MultipartFormDataItems &items); + Result Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items); + Result Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, const std::string &boundary); + Result Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items); + + Result Put(const std::string &path); + Result Put(const std::string &path, const char *body, size_t content_length, + const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, const char *body, + size_t content_length, const std::string &content_type); + Result Put(const std::string &path, const std::string &body, + const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type); + Result Put(const std::string &path, size_t content_length, + ContentProvider content_provider, const std::string &content_type); + Result Put(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, + size_t content_length, ContentProvider content_provider, + const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Put(const std::string &path, const Params ¶ms); + Result Put(const std::string &path, const Headers &headers, + const Params ¶ms); + Result Put(const std::string &path, const MultipartFormDataItems &items); + Result Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items); + Result Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, const std::string &boundary); + Result Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items); + + Result Patch(const std::string &path); + Result Patch(const std::string &path, const char *body, size_t content_length, + const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type); + Result Patch(const std::string &path, const std::string &body, + const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type); + Result Patch(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type); + Result Patch(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + size_t content_length, ContentProvider content_provider, + const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + + Result Delete(const std::string &path); + Result Delete(const std::string &path, const Headers &headers); + Result Delete(const std::string &path, const char *body, + size_t content_length, const std::string &content_type); + Result Delete(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type); + Result Delete(const std::string &path, const std::string &body, + const std::string &content_type); + Result Delete(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type); + + Result Options(const std::string &path); + Result Options(const std::string &path, const Headers &headers); + + bool send(Request &req, Response &res, Error &error); + Result send(const Request &req); + + size_t is_socket_open() const; + + socket_t socket() const; + + void stop(); + + void set_hostname_addr_map(std::map addr_map); + + void set_default_headers(Headers headers); + + void set_address_family(int family); + void set_tcp_nodelay(bool on); + void set_socket_options(SocketOptions socket_options); + + void set_connection_timeout(time_t sec, time_t usec = 0); + template + void + set_connection_timeout(const std::chrono::duration &duration); + + void set_read_timeout(time_t sec, time_t usec = 0); + template + void set_read_timeout(const std::chrono::duration &duration); + + void set_write_timeout(time_t sec, time_t usec = 0); + template + void set_write_timeout(const std::chrono::duration &duration); + + void set_basic_auth(const std::string &username, const std::string &password); + void set_bearer_token_auth(const std::string &token); +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void set_digest_auth(const std::string &username, + const std::string &password); +#endif + + void set_keep_alive(bool on); + void set_follow_location(bool on); + + void set_url_encode(bool on); + + void set_compress(bool on); + + void set_decompress(bool on); + + void set_interface(const std::string &intf); + + void set_proxy(const std::string &host, int port); + void set_proxy_basic_auth(const std::string &username, + const std::string &password); + void set_proxy_bearer_token_auth(const std::string &token); +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void set_proxy_digest_auth(const std::string &username, + const std::string &password); +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void set_ca_cert_path(const std::string &ca_cert_file_path, + const std::string &ca_cert_dir_path = std::string()); + void set_ca_cert_store(X509_STORE *ca_cert_store); +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void enable_server_certificate_verification(bool enabled); +#endif + + void set_logger(Logger logger); + +protected: + struct Socket { + socket_t sock = INVALID_SOCKET; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + SSL *ssl = nullptr; +#endif + + bool is_open() const { return sock != INVALID_SOCKET; } + }; + + virtual bool create_and_connect_socket(Socket &socket, Error &error); + + // All of: + // shutdown_ssl + // shutdown_socket + // close_socket + // should ONLY be called when socket_mutex_ is locked. + // Also, shutdown_ssl and close_socket should also NOT be called concurrently + // with a DIFFERENT thread sending requests using that socket. + virtual void shutdown_ssl(Socket &socket, bool shutdown_gracefully); + void shutdown_socket(Socket &socket); + void close_socket(Socket &socket); + + bool process_request(Stream &strm, Request &req, Response &res, + bool close_connection, Error &error); + + bool write_content_with_provider(Stream &strm, const Request &req, + Error &error); + + void copy_settings(const ClientImpl &rhs); + + // Socket endpoint information + const std::string host_; + const int port_; + const std::string host_and_port_; + + // Current open socket + Socket socket_; + mutable std::mutex socket_mutex_; + std::recursive_mutex request_mutex_; + + // These are all protected under socket_mutex + size_t socket_requests_in_flight_ = 0; + std::thread::id socket_requests_are_from_thread_ = std::thread::id(); + bool socket_should_be_closed_when_request_is_done_ = false; + + // Hostname-IP map + std::map addr_map_; + + // Default headers + Headers default_headers_; + + // Settings + std::string client_cert_path_; + std::string client_key_path_; + + time_t connection_timeout_sec_ = CPPHTTPLIB_CONNECTION_TIMEOUT_SECOND; + time_t connection_timeout_usec_ = CPPHTTPLIB_CONNECTION_TIMEOUT_USECOND; + time_t read_timeout_sec_ = CPPHTTPLIB_READ_TIMEOUT_SECOND; + time_t read_timeout_usec_ = CPPHTTPLIB_READ_TIMEOUT_USECOND; + time_t write_timeout_sec_ = CPPHTTPLIB_WRITE_TIMEOUT_SECOND; + time_t write_timeout_usec_ = CPPHTTPLIB_WRITE_TIMEOUT_USECOND; + + std::string basic_auth_username_; + std::string basic_auth_password_; + std::string bearer_token_auth_token_; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + std::string digest_auth_username_; + std::string digest_auth_password_; +#endif + + bool keep_alive_ = false; + bool follow_location_ = false; + + bool url_encode_ = true; + + int address_family_ = AF_UNSPEC; + bool tcp_nodelay_ = CPPHTTPLIB_TCP_NODELAY; + SocketOptions socket_options_ = nullptr; + + bool compress_ = false; + bool decompress_ = true; + + std::string interface_; + + std::string proxy_host_; + int proxy_port_ = -1; + + std::string proxy_basic_auth_username_; + std::string proxy_basic_auth_password_; + std::string proxy_bearer_token_auth_token_; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + std::string proxy_digest_auth_username_; + std::string proxy_digest_auth_password_; +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + std::string ca_cert_file_path_; + std::string ca_cert_dir_path_; + + X509_STORE *ca_cert_store_ = nullptr; +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + bool server_certificate_verification_ = true; +#endif + + Logger logger_; + +private: + bool send_(Request &req, Response &res, Error &error); + Result send_(Request &&req); + + socket_t create_client_socket(Error &error) const; + bool read_response_line(Stream &strm, const Request &req, Response &res); + bool write_request(Stream &strm, Request &req, bool close_connection, + Error &error); + bool redirect(Request &req, Response &res, Error &error); + bool handle_request(Stream &strm, Request &req, Response &res, + bool close_connection, Error &error); + std::unique_ptr send_with_content_provider( + Request &req, const char *body, size_t content_length, + ContentProvider content_provider, + ContentProviderWithoutLength content_provider_without_length, + const std::string &content_type, Error &error); + Result send_with_content_provider( + const std::string &method, const std::string &path, + const Headers &headers, const char *body, size_t content_length, + ContentProvider content_provider, + ContentProviderWithoutLength content_provider_without_length, + const std::string &content_type); + ContentProviderWithoutLength get_multipart_content_provider( + const std::string &boundary, const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items); + + std::string adjust_host_string(const std::string &host) const; + + virtual bool process_socket(const Socket &socket, + std::function callback); + virtual bool is_ssl() const; +}; + +class Client { +public: + // Universal interface + explicit Client(const std::string &scheme_host_port); + + explicit Client(const std::string &scheme_host_port, + const std::string &client_cert_path, + const std::string &client_key_path); + + // HTTP only interface + explicit Client(const std::string &host, int port); + + explicit Client(const std::string &host, int port, + const std::string &client_cert_path, + const std::string &client_key_path); + + Client(Client &&) = default; + + ~Client(); + + bool is_valid() const; + + Result Get(const std::string &path); + Result Get(const std::string &path, const Headers &headers); + Result Get(const std::string &path, Progress progress); + Result Get(const std::string &path, const Headers &headers, + Progress progress); + Result Get(const std::string &path, ContentReceiver content_receiver); + Result Get(const std::string &path, const Headers &headers, + ContentReceiver content_receiver); + Result Get(const std::string &path, ContentReceiver content_receiver, + Progress progress); + Result Get(const std::string &path, const Headers &headers, + ContentReceiver content_receiver, Progress progress); + Result Get(const std::string &path, ResponseHandler response_handler, + ContentReceiver content_receiver); + Result Get(const std::string &path, const Headers &headers, + ResponseHandler response_handler, + ContentReceiver content_receiver); + Result Get(const std::string &path, const Headers &headers, + ResponseHandler response_handler, ContentReceiver content_receiver, + Progress progress); + Result Get(const std::string &path, ResponseHandler response_handler, + ContentReceiver content_receiver, Progress progress); + + Result Get(const std::string &path, const Params ¶ms, + const Headers &headers, Progress progress = nullptr); + Result Get(const std::string &path, const Params ¶ms, + const Headers &headers, ContentReceiver content_receiver, + Progress progress = nullptr); + Result Get(const std::string &path, const Params ¶ms, + const Headers &headers, ResponseHandler response_handler, + ContentReceiver content_receiver, Progress progress = nullptr); + + Result Head(const std::string &path); + Result Head(const std::string &path, const Headers &headers); + + Result Post(const std::string &path); + Result Post(const std::string &path, const Headers &headers); + Result Post(const std::string &path, const char *body, size_t content_length, + const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, const char *body, + size_t content_length, const std::string &content_type); + Result Post(const std::string &path, const std::string &body, + const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type); + Result Post(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type); + Result Post(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, + size_t content_length, ContentProvider content_provider, + const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Post(const std::string &path, const Params ¶ms); + Result Post(const std::string &path, const Headers &headers, + const Params ¶ms); + Result Post(const std::string &path, const MultipartFormDataItems &items); + Result Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items); + Result Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, const std::string &boundary); + Result Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items); + + Result Put(const std::string &path); + Result Put(const std::string &path, const char *body, size_t content_length, + const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, const char *body, + size_t content_length, const std::string &content_type); + Result Put(const std::string &path, const std::string &body, + const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type); + Result Put(const std::string &path, size_t content_length, + ContentProvider content_provider, const std::string &content_type); + Result Put(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, + size_t content_length, ContentProvider content_provider, + const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Put(const std::string &path, const Params ¶ms); + Result Put(const std::string &path, const Headers &headers, + const Params ¶ms); + Result Put(const std::string &path, const MultipartFormDataItems &items); + Result Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items); + Result Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, const std::string &boundary); + Result Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items); + + Result Patch(const std::string &path); + Result Patch(const std::string &path, const char *body, size_t content_length, + const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type); + Result Patch(const std::string &path, const std::string &body, + const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type); + Result Patch(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type); + Result Patch(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + size_t content_length, ContentProvider content_provider, + const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type); + + Result Delete(const std::string &path); + Result Delete(const std::string &path, const Headers &headers); + Result Delete(const std::string &path, const char *body, + size_t content_length, const std::string &content_type); + Result Delete(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type); + Result Delete(const std::string &path, const std::string &body, + const std::string &content_type); + Result Delete(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type); + + Result Options(const std::string &path); + Result Options(const std::string &path, const Headers &headers); + + bool send(Request &req, Response &res, Error &error); + Result send(const Request &req); + + size_t is_socket_open() const; + + socket_t socket() const; + + void stop(); + + void set_hostname_addr_map(std::map addr_map); + + void set_default_headers(Headers headers); + + void set_address_family(int family); + void set_tcp_nodelay(bool on); + void set_socket_options(SocketOptions socket_options); + + void set_connection_timeout(time_t sec, time_t usec = 0); + template + void + set_connection_timeout(const std::chrono::duration &duration); + + void set_read_timeout(time_t sec, time_t usec = 0); + template + void set_read_timeout(const std::chrono::duration &duration); + + void set_write_timeout(time_t sec, time_t usec = 0); + template + void set_write_timeout(const std::chrono::duration &duration); + + void set_basic_auth(const std::string &username, const std::string &password); + void set_bearer_token_auth(const std::string &token); +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void set_digest_auth(const std::string &username, + const std::string &password); +#endif + + void set_keep_alive(bool on); + void set_follow_location(bool on); + + void set_url_encode(bool on); + + void set_compress(bool on); + + void set_decompress(bool on); + + void set_interface(const std::string &intf); + + void set_proxy(const std::string &host, int port); + void set_proxy_basic_auth(const std::string &username, + const std::string &password); + void set_proxy_bearer_token_auth(const std::string &token); +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void set_proxy_digest_auth(const std::string &username, + const std::string &password); +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void enable_server_certificate_verification(bool enabled); +#endif + + void set_logger(Logger logger); + + // SSL +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + void set_ca_cert_path(const std::string &ca_cert_file_path, + const std::string &ca_cert_dir_path = std::string()); + + void set_ca_cert_store(X509_STORE *ca_cert_store); + + long get_openssl_verify_result() const; + + SSL_CTX *ssl_context() const; +#endif + +private: + std::unique_ptr cli_; + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + bool is_ssl_ = false; +#endif +}; + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +class SSLServer : public Server { +public: + SSLServer(const char *cert_path, const char *private_key_path, + const char *client_ca_cert_file_path = nullptr, + const char *client_ca_cert_dir_path = nullptr, + const char *private_key_password = nullptr); + + SSLServer(X509 *cert, EVP_PKEY *private_key, + X509_STORE *client_ca_cert_store = nullptr); + + SSLServer( + const std::function &setup_ssl_ctx_callback); + + ~SSLServer() override; + + bool is_valid() const override; + + SSL_CTX *ssl_context() const; + +private: + bool process_and_close_socket(socket_t sock) override; + + SSL_CTX *ctx_; + std::mutex ctx_mutex_; +}; + +class SSLClient : public ClientImpl { +public: + explicit SSLClient(const std::string &host); + + explicit SSLClient(const std::string &host, int port); + + explicit SSLClient(const std::string &host, int port, + const std::string &client_cert_path, + const std::string &client_key_path); + + explicit SSLClient(const std::string &host, int port, X509 *client_cert, + EVP_PKEY *client_key); + + ~SSLClient() override; + + bool is_valid() const override; + + void set_ca_cert_store(X509_STORE *ca_cert_store); + + long get_openssl_verify_result() const; + + SSL_CTX *ssl_context() const; + +private: + bool create_and_connect_socket(Socket &socket, Error &error) override; + void shutdown_ssl(Socket &socket, bool shutdown_gracefully) override; + void shutdown_ssl_impl(Socket &socket, bool shutdown_socket); + + bool process_socket(const Socket &socket, + std::function callback) override; + bool is_ssl() const override; + + bool connect_with_proxy(Socket &sock, Response &res, bool &success, + Error &error); + bool initialize_ssl(Socket &socket, Error &error); + + bool load_certs(); + + bool verify_host(X509 *server_cert) const; + bool verify_host_with_subject_alt_name(X509 *server_cert) const; + bool verify_host_with_common_name(X509 *server_cert) const; + bool check_host_name(const char *pattern, size_t pattern_len) const; + + SSL_CTX *ctx_; + std::mutex ctx_mutex_; + std::once_flag initialize_cert_; + + std::vector host_components_; + + long verify_result_ = 0; + + friend class ClientImpl; +}; +#endif + +/* + * Implementation of template methods. + */ + +namespace detail { + +template +inline void duration_to_sec_and_usec(const T &duration, U callback) { + auto sec = std::chrono::duration_cast(duration).count(); + auto usec = std::chrono::duration_cast( + duration - std::chrono::seconds(sec)) + .count(); + callback(static_cast(sec), static_cast(usec)); +} + +template +inline T get_header_value(const Headers & /*headers*/, + const std::string & /*key*/, size_t /*id*/ = 0, + uint64_t /*def*/ = 0) {} + +template <> +inline uint64_t get_header_value(const Headers &headers, + const std::string &key, size_t id, + uint64_t def) { + auto rng = headers.equal_range(key); + auto it = rng.first; + std::advance(it, static_cast(id)); + if (it != rng.second) { + return std::strtoull(it->second.data(), nullptr, 10); + } + return def; +} + +} // namespace detail + +template +inline T Request::get_header_value(const std::string &key, size_t id) const { + return detail::get_header_value(headers, key, id, 0); +} + +template +inline T Response::get_header_value(const std::string &key, size_t id) const { + return detail::get_header_value(headers, key, id, 0); +} + +template +inline ssize_t Stream::write_format(const char *fmt, const Args &...args) { + const auto bufsiz = 2048; + std::array buf{}; + + auto sn = snprintf(buf.data(), buf.size() - 1, fmt, args...); + if (sn <= 0) { return sn; } + + auto n = static_cast(sn); + + if (n >= buf.size() - 1) { + std::vector glowable_buf(buf.size()); + + while (n >= glowable_buf.size() - 1) { + glowable_buf.resize(glowable_buf.size() * 2); + n = static_cast( + snprintf(&glowable_buf[0], glowable_buf.size() - 1, fmt, args...)); + } + return write(&glowable_buf[0], n); + } else { + return write(buf.data(), n); + } +} + +inline void default_socket_options(socket_t sock) { + int yes = 1; +#ifdef _WIN32 + setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, reinterpret_cast(&yes), + sizeof(yes)); + setsockopt(sock, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, + reinterpret_cast(&yes), sizeof(yes)); +#else +#ifdef SO_REUSEPORT + setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, reinterpret_cast(&yes), + sizeof(yes)); +#else + setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, reinterpret_cast(&yes), + sizeof(yes)); +#endif +#endif +} + +template +inline Server & +Server::set_read_timeout(const std::chrono::duration &duration) { + detail::duration_to_sec_and_usec( + duration, [&](time_t sec, time_t usec) { set_read_timeout(sec, usec); }); + return *this; +} + +template +inline Server & +Server::set_write_timeout(const std::chrono::duration &duration) { + detail::duration_to_sec_and_usec( + duration, [&](time_t sec, time_t usec) { set_write_timeout(sec, usec); }); + return *this; +} + +template +inline Server & +Server::set_idle_interval(const std::chrono::duration &duration) { + detail::duration_to_sec_and_usec( + duration, [&](time_t sec, time_t usec) { set_idle_interval(sec, usec); }); + return *this; +} + +inline std::string to_string(const Error error) { + switch (error) { + case Error::Success: return "Success (no error)"; + case Error::Connection: return "Could not establish connection"; + case Error::BindIPAddress: return "Failed to bind IP address"; + case Error::Read: return "Failed to read connection"; + case Error::Write: return "Failed to write connection"; + case Error::ExceedRedirectCount: return "Maximum redirect count exceeded"; + case Error::Canceled: return "Connection handling canceled"; + case Error::SSLConnection: return "SSL connection failed"; + case Error::SSLLoadingCerts: return "SSL certificate loading failed"; + case Error::SSLServerVerification: return "SSL server verification failed"; + case Error::UnsupportedMultipartBoundaryChars: + return "Unsupported HTTP multipart boundary characters"; + case Error::Compression: return "Compression failed"; + case Error::ConnectionTimeout: return "Connection timed out"; + case Error::Unknown: return "Unknown"; + default: break; + } + + return "Invalid"; +} + +inline std::ostream &operator<<(std::ostream &os, const Error &obj) { + os << to_string(obj); + os << " (" << static_cast::type>(obj) << ')'; + return os; +} + +template +inline T Result::get_request_header_value(const std::string &key, + size_t id) const { + return detail::get_header_value(request_headers_, key, id, 0); +} + +template +inline void ClientImpl::set_connection_timeout( + const std::chrono::duration &duration) { + detail::duration_to_sec_and_usec(duration, [&](time_t sec, time_t usec) { + set_connection_timeout(sec, usec); + }); +} + +template +inline void ClientImpl::set_read_timeout( + const std::chrono::duration &duration) { + detail::duration_to_sec_and_usec( + duration, [&](time_t sec, time_t usec) { set_read_timeout(sec, usec); }); +} + +template +inline void ClientImpl::set_write_timeout( + const std::chrono::duration &duration) { + detail::duration_to_sec_and_usec( + duration, [&](time_t sec, time_t usec) { set_write_timeout(sec, usec); }); +} + +template +inline void Client::set_connection_timeout( + const std::chrono::duration &duration) { + cli_->set_connection_timeout(duration); +} + +template +inline void +Client::set_read_timeout(const std::chrono::duration &duration) { + cli_->set_read_timeout(duration); +} + +template +inline void +Client::set_write_timeout(const std::chrono::duration &duration) { + cli_->set_write_timeout(duration); +} + +/* + * Forward declarations and types that will be part of the .h file if split into + * .h + .cc. + */ + +std::string hosted_at(const std::string &hostname); + +void hosted_at(const std::string &hostname, std::vector &addrs); + +std::string append_query_params(const std::string &path, const Params ¶ms); + +std::pair make_range_header(Ranges ranges); + +std::pair +make_basic_authentication_header(const std::string &username, + const std::string &password, + bool is_proxy = false); + +namespace detail { + +std::string encode_query_param(const std::string &value); + +std::string decode_url(const std::string &s, bool convert_plus_to_space); + +void read_file(const std::string &path, std::string &out); + +std::string trim_copy(const std::string &s); + +void split(const char *b, const char *e, char d, + std::function fn); + +bool process_client_socket(socket_t sock, time_t read_timeout_sec, + time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, + std::function callback); + +socket_t create_client_socket( + const std::string &host, const std::string &ip, int port, + int address_family, bool tcp_nodelay, SocketOptions socket_options, + time_t connection_timeout_sec, time_t connection_timeout_usec, + time_t read_timeout_sec, time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, const std::string &intf, Error &error); + +const char *get_header_value(const Headers &headers, const std::string &key, + size_t id = 0, const char *def = nullptr); + +std::string params_to_query_str(const Params ¶ms); + +void parse_query_text(const std::string &s, Params ¶ms); + +bool parse_multipart_boundary(const std::string &content_type, + std::string &boundary); + +bool parse_range_header(const std::string &s, Ranges &ranges); + +int close_socket(socket_t sock); + +ssize_t send_socket(socket_t sock, const void *ptr, size_t size, int flags); + +ssize_t read_socket(socket_t sock, void *ptr, size_t size, int flags); + +enum class EncodingType { None = 0, Gzip, Brotli }; + +EncodingType encoding_type(const Request &req, const Response &res); + +class BufferStream : public Stream { +public: + BufferStream() = default; + ~BufferStream() override = default; + + bool is_readable() const override; + bool is_writable() const override; + ssize_t read(char *ptr, size_t size) override; + ssize_t write(const char *ptr, size_t size) override; + void get_remote_ip_and_port(std::string &ip, int &port) const override; + void get_local_ip_and_port(std::string &ip, int &port) const override; + socket_t socket() const override; + + const std::string &get_buffer() const; + +private: + std::string buffer; + size_t position = 0; +}; + +class compressor { +public: + virtual ~compressor() = default; + + typedef std::function Callback; + virtual bool compress(const char *data, size_t data_length, bool last, + Callback callback) = 0; +}; + +class decompressor { +public: + virtual ~decompressor() = default; + + virtual bool is_valid() const = 0; + + typedef std::function Callback; + virtual bool decompress(const char *data, size_t data_length, + Callback callback) = 0; +}; + +class nocompressor : public compressor { +public: + virtual ~nocompressor() = default; + + bool compress(const char *data, size_t data_length, bool /*last*/, + Callback callback) override; +}; + +#ifdef CPPHTTPLIB_ZLIB_SUPPORT +class gzip_compressor : public compressor { +public: + gzip_compressor(); + ~gzip_compressor(); + + bool compress(const char *data, size_t data_length, bool last, + Callback callback) override; + +private: + bool is_valid_ = false; + z_stream strm_; +}; + +class gzip_decompressor : public decompressor { +public: + gzip_decompressor(); + ~gzip_decompressor(); + + bool is_valid() const override; + + bool decompress(const char *data, size_t data_length, + Callback callback) override; + +private: + bool is_valid_ = false; + z_stream strm_; +}; +#endif + +#ifdef CPPHTTPLIB_BROTLI_SUPPORT +class brotli_compressor : public compressor { +public: + brotli_compressor(); + ~brotli_compressor(); + + bool compress(const char *data, size_t data_length, bool last, + Callback callback) override; + +private: + BrotliEncoderState *state_ = nullptr; +}; + +class brotli_decompressor : public decompressor { +public: + brotli_decompressor(); + ~brotli_decompressor(); + + bool is_valid() const override; + + bool decompress(const char *data, size_t data_length, + Callback callback) override; + +private: + BrotliDecoderResult decoder_r; + BrotliDecoderState *decoder_s = nullptr; +}; +#endif + +// NOTE: until the read size reaches `fixed_buffer_size`, use `fixed_buffer` +// to store data. The call can set memory on stack for performance. +class stream_line_reader { +public: + stream_line_reader(Stream &strm, char *fixed_buffer, + size_t fixed_buffer_size); + const char *ptr() const; + size_t size() const; + bool end_with_crlf() const; + bool getline(); + +private: + void append(char c); + + Stream &strm_; + char *fixed_buffer_; + const size_t fixed_buffer_size_; + size_t fixed_buffer_used_size_ = 0; + std::string glowable_buffer_; +}; + +} // namespace detail + +// ---------------------------------------------------------------------------- + +/* + * Implementation that will be part of the .cc file if split into .h + .cc. + */ + +namespace detail { + +inline bool is_hex(char c, int &v) { + if (0x20 <= c && isdigit(c)) { + v = c - '0'; + return true; + } else if ('A' <= c && c <= 'F') { + v = c - 'A' + 10; + return true; + } else if ('a' <= c && c <= 'f') { + v = c - 'a' + 10; + return true; + } + return false; +} + +inline bool from_hex_to_i(const std::string &s, size_t i, size_t cnt, + int &val) { + if (i >= s.size()) { return false; } + + val = 0; + for (; cnt; i++, cnt--) { + if (!s[i]) { return false; } + int v = 0; + if (is_hex(s[i], v)) { + val = val * 16 + v; + } else { + return false; + } + } + return true; +} + +inline std::string from_i_to_hex(size_t n) { + const char *charset = "0123456789abcdef"; + std::string ret; + do { + ret = charset[n & 15] + ret; + n >>= 4; + } while (n > 0); + return ret; +} + +inline size_t to_utf8(int code, char *buff) { + if (code < 0x0080) { + buff[0] = (code & 0x7F); + return 1; + } else if (code < 0x0800) { + buff[0] = static_cast(0xC0 | ((code >> 6) & 0x1F)); + buff[1] = static_cast(0x80 | (code & 0x3F)); + return 2; + } else if (code < 0xD800) { + buff[0] = static_cast(0xE0 | ((code >> 12) & 0xF)); + buff[1] = static_cast(0x80 | ((code >> 6) & 0x3F)); + buff[2] = static_cast(0x80 | (code & 0x3F)); + return 3; + } else if (code < 0xE000) { // D800 - DFFF is invalid... + return 0; + } else if (code < 0x10000) { + buff[0] = static_cast(0xE0 | ((code >> 12) & 0xF)); + buff[1] = static_cast(0x80 | ((code >> 6) & 0x3F)); + buff[2] = static_cast(0x80 | (code & 0x3F)); + return 3; + } else if (code < 0x110000) { + buff[0] = static_cast(0xF0 | ((code >> 18) & 0x7)); + buff[1] = static_cast(0x80 | ((code >> 12) & 0x3F)); + buff[2] = static_cast(0x80 | ((code >> 6) & 0x3F)); + buff[3] = static_cast(0x80 | (code & 0x3F)); + return 4; + } + + // NOTREACHED + return 0; +} + +// NOTE: This code came up with the following stackoverflow post: +// https://stackoverflow.com/questions/180947/base64-decode-snippet-in-c +inline std::string base64_encode(const std::string &in) { + static const auto lookup = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + std::string out; + out.reserve(in.size()); + + int val = 0; + int valb = -6; + + for (auto c : in) { + val = (val << 8) + static_cast(c); + valb += 8; + while (valb >= 0) { + out.push_back(lookup[(val >> valb) & 0x3F]); + valb -= 6; + } + } + + if (valb > -6) { out.push_back(lookup[((val << 8) >> (valb + 8)) & 0x3F]); } + + while (out.size() % 4) { + out.push_back('='); + } + + return out; +} + +inline bool is_file(const std::string &path) { +#ifdef _WIN32 + return _access_s(path.c_str(), 0) == 0; +#else + struct stat st; + return stat(path.c_str(), &st) >= 0 && S_ISREG(st.st_mode); +#endif +} + +inline bool is_dir(const std::string &path) { + struct stat st; + return stat(path.c_str(), &st) >= 0 && S_ISDIR(st.st_mode); +} + +inline bool is_valid_path(const std::string &path) { + size_t level = 0; + size_t i = 0; + + // Skip slash + while (i < path.size() && path[i] == '/') { + i++; + } + + while (i < path.size()) { + // Read component + auto beg = i; + while (i < path.size() && path[i] != '/') { + i++; + } + + auto len = i - beg; + assert(len > 0); + + if (!path.compare(beg, len, ".")) { + ; + } else if (!path.compare(beg, len, "..")) { + if (level == 0) { return false; } + level--; + } else { + level++; + } + + // Skip slash + while (i < path.size() && path[i] == '/') { + i++; + } + } + + return true; +} + +inline std::string encode_query_param(const std::string &value) { + std::ostringstream escaped; + escaped.fill('0'); + escaped << std::hex; + + for (auto c : value) { + if (std::isalnum(static_cast(c)) || c == '-' || c == '_' || + c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || + c == ')') { + escaped << c; + } else { + escaped << std::uppercase; + escaped << '%' << std::setw(2) + << static_cast(static_cast(c)); + escaped << std::nouppercase; + } + } + + return escaped.str(); +} + +inline std::string encode_url(const std::string &s) { + std::string result; + result.reserve(s.size()); + + for (size_t i = 0; s[i]; i++) { + switch (s[i]) { + case ' ': result += "%20"; break; + case '+': result += "%2B"; break; + case '\r': result += "%0D"; break; + case '\n': result += "%0A"; break; + case '\'': result += "%27"; break; + case ',': result += "%2C"; break; + // case ':': result += "%3A"; break; // ok? probably... + case ';': result += "%3B"; break; + default: + auto c = static_cast(s[i]); + if (c >= 0x80) { + result += '%'; + char hex[4]; + auto len = snprintf(hex, sizeof(hex) - 1, "%02X", c); + assert(len == 2); + result.append(hex, static_cast(len)); + } else { + result += s[i]; + } + break; + } + } + + return result; +} + +inline std::string decode_url(const std::string &s, + bool convert_plus_to_space) { + std::string result; + + for (size_t i = 0; i < s.size(); i++) { + if (s[i] == '%' && i + 1 < s.size()) { + if (s[i + 1] == 'u') { + int val = 0; + if (from_hex_to_i(s, i + 2, 4, val)) { + // 4 digits Unicode codes + char buff[4]; + size_t len = to_utf8(val, buff); + if (len > 0) { result.append(buff, len); } + i += 5; // 'u0000' + } else { + result += s[i]; + } + } else { + int val = 0; + if (from_hex_to_i(s, i + 1, 2, val)) { + // 2 digits hex codes + result += static_cast(val); + i += 2; // '00' + } else { + result += s[i]; + } + } + } else if (convert_plus_to_space && s[i] == '+') { + result += ' '; + } else { + result += s[i]; + } + } + + return result; +} + +inline void read_file(const std::string &path, std::string &out) { + std::ifstream fs(path, std::ios_base::binary); + fs.seekg(0, std::ios_base::end); + auto size = fs.tellg(); + fs.seekg(0); + out.resize(static_cast(size)); + fs.read(&out[0], static_cast(size)); +} + +inline std::string file_extension(const std::string &path) { + std::smatch m; + static auto re = std::regex("\\.([a-zA-Z0-9]+)$"); + if (std::regex_search(path, m, re)) { return m[1].str(); } + return std::string(); +} + +inline bool is_space_or_tab(char c) { return c == ' ' || c == '\t'; } + +inline std::pair trim(const char *b, const char *e, size_t left, + size_t right) { + while (b + left < e && is_space_or_tab(b[left])) { + left++; + } + while (right > 0 && is_space_or_tab(b[right - 1])) { + right--; + } + return std::make_pair(left, right); +} + +inline std::string trim_copy(const std::string &s) { + auto r = trim(s.data(), s.data() + s.size(), 0, s.size()); + return s.substr(r.first, r.second - r.first); +} + +inline void split(const char *b, const char *e, char d, + std::function fn) { + size_t i = 0; + size_t beg = 0; + + while (e ? (b + i < e) : (b[i] != '\0')) { + if (b[i] == d) { + auto r = trim(b, e, beg, i); + if (r.first < r.second) { fn(&b[r.first], &b[r.second]); } + beg = i + 1; + } + i++; + } + + if (i) { + auto r = trim(b, e, beg, i); + if (r.first < r.second) { fn(&b[r.first], &b[r.second]); } + } +} + +inline stream_line_reader::stream_line_reader(Stream &strm, char *fixed_buffer, + size_t fixed_buffer_size) + : strm_(strm), fixed_buffer_(fixed_buffer), + fixed_buffer_size_(fixed_buffer_size) {} + +inline const char *stream_line_reader::ptr() const { + if (glowable_buffer_.empty()) { + return fixed_buffer_; + } else { + return glowable_buffer_.data(); + } +} + +inline size_t stream_line_reader::size() const { + if (glowable_buffer_.empty()) { + return fixed_buffer_used_size_; + } else { + return glowable_buffer_.size(); + } +} + +inline bool stream_line_reader::end_with_crlf() const { + auto end = ptr() + size(); + return size() >= 2 && end[-2] == '\r' && end[-1] == '\n'; +} + +inline bool stream_line_reader::getline() { + fixed_buffer_used_size_ = 0; + glowable_buffer_.clear(); + + for (size_t i = 0;; i++) { + char byte; + auto n = strm_.read(&byte, 1); + + if (n < 0) { + return false; + } else if (n == 0) { + if (i == 0) { + return false; + } else { + break; + } + } + + append(byte); + + if (byte == '\n') { break; } + } + + return true; +} + +inline void stream_line_reader::append(char c) { + if (fixed_buffer_used_size_ < fixed_buffer_size_ - 1) { + fixed_buffer_[fixed_buffer_used_size_++] = c; + fixed_buffer_[fixed_buffer_used_size_] = '\0'; + } else { + if (glowable_buffer_.empty()) { + assert(fixed_buffer_[fixed_buffer_used_size_] == '\0'); + glowable_buffer_.assign(fixed_buffer_, fixed_buffer_used_size_); + } + glowable_buffer_ += c; + } +} + +inline int close_socket(socket_t sock) { +#ifdef _WIN32 + return closesocket(sock); +#else + return close(sock); +#endif +} + +template inline ssize_t handle_EINTR(T fn) { + ssize_t res = false; + while (true) { + res = fn(); + if (res < 0 && errno == EINTR) { continue; } + break; + } + return res; +} + +inline ssize_t read_socket(socket_t sock, void *ptr, size_t size, int flags) { + return handle_EINTR([&]() { + return recv(sock, +#ifdef _WIN32 + static_cast(ptr), static_cast(size), +#else + ptr, size, +#endif + flags); + }); +} + +inline ssize_t send_socket(socket_t sock, const void *ptr, size_t size, + int flags) { + return handle_EINTR([&]() { + return send(sock, +#ifdef _WIN32 + static_cast(ptr), static_cast(size), +#else + ptr, size, +#endif + flags); + }); +} + +inline ssize_t select_read(socket_t sock, time_t sec, time_t usec) { +#ifdef CPPHTTPLIB_USE_POLL + struct pollfd pfd_read; + pfd_read.fd = sock; + pfd_read.events = POLLIN; + + auto timeout = static_cast(sec * 1000 + usec / 1000); + + return handle_EINTR([&]() { return poll(&pfd_read, 1, timeout); }); +#else +#ifndef _WIN32 + if (sock >= FD_SETSIZE) { return 1; } +#endif + + fd_set fds; + FD_ZERO(&fds); + FD_SET(sock, &fds); + + timeval tv; + tv.tv_sec = static_cast(sec); + tv.tv_usec = static_cast(usec); + + return handle_EINTR([&]() { + return select(static_cast(sock + 1), &fds, nullptr, nullptr, &tv); + }); +#endif +} + +inline ssize_t select_write(socket_t sock, time_t sec, time_t usec) { +#ifdef CPPHTTPLIB_USE_POLL + struct pollfd pfd_read; + pfd_read.fd = sock; + pfd_read.events = POLLOUT; + + auto timeout = static_cast(sec * 1000 + usec / 1000); + + return handle_EINTR([&]() { return poll(&pfd_read, 1, timeout); }); +#else +#ifndef _WIN32 + if (sock >= FD_SETSIZE) { return 1; } +#endif + + fd_set fds; + FD_ZERO(&fds); + FD_SET(sock, &fds); + + timeval tv; + tv.tv_sec = static_cast(sec); + tv.tv_usec = static_cast(usec); + + return handle_EINTR([&]() { + return select(static_cast(sock + 1), nullptr, &fds, nullptr, &tv); + }); +#endif +} + +inline Error wait_until_socket_is_ready(socket_t sock, time_t sec, + time_t usec) { +#ifdef CPPHTTPLIB_USE_POLL + struct pollfd pfd_read; + pfd_read.fd = sock; + pfd_read.events = POLLIN | POLLOUT; + + auto timeout = static_cast(sec * 1000 + usec / 1000); + + auto poll_res = handle_EINTR([&]() { return poll(&pfd_read, 1, timeout); }); + + if (poll_res == 0) { return Error::ConnectionTimeout; } + + if (poll_res > 0 && pfd_read.revents & (POLLIN | POLLOUT)) { + int error = 0; + socklen_t len = sizeof(error); + auto res = getsockopt(sock, SOL_SOCKET, SO_ERROR, + reinterpret_cast(&error), &len); + auto successful = res >= 0 && !error; + return successful ? Error::Success : Error::Connection; + } + + return Error::Connection; +#else +#ifndef _WIN32 + if (sock >= FD_SETSIZE) { return Error::Connection; } +#endif + + fd_set fdsr; + FD_ZERO(&fdsr); + FD_SET(sock, &fdsr); + + auto fdsw = fdsr; + auto fdse = fdsr; + + timeval tv; + tv.tv_sec = static_cast(sec); + tv.tv_usec = static_cast(usec); + + auto ret = handle_EINTR([&]() { + return select(static_cast(sock + 1), &fdsr, &fdsw, &fdse, &tv); + }); + + if (ret == 0) { return Error::ConnectionTimeout; } + + if (ret > 0 && (FD_ISSET(sock, &fdsr) || FD_ISSET(sock, &fdsw))) { + int error = 0; + socklen_t len = sizeof(error); + auto res = getsockopt(sock, SOL_SOCKET, SO_ERROR, + reinterpret_cast(&error), &len); + auto successful = res >= 0 && !error; + return successful ? Error::Success : Error::Connection; + } + return Error::Connection; +#endif +} + +inline bool is_socket_alive(socket_t sock) { + const auto val = detail::select_read(sock, 0, 0); + if (val == 0) { + return true; + } else if (val < 0 && errno == EBADF) { + return false; + } + char buf[1]; + return detail::read_socket(sock, &buf[0], sizeof(buf), MSG_PEEK) > 0; +} + +class SocketStream : public Stream { +public: + SocketStream(socket_t sock, time_t read_timeout_sec, time_t read_timeout_usec, + time_t write_timeout_sec, time_t write_timeout_usec); + ~SocketStream() override; + + bool is_readable() const override; + bool is_writable() const override; + ssize_t read(char *ptr, size_t size) override; + ssize_t write(const char *ptr, size_t size) override; + void get_remote_ip_and_port(std::string &ip, int &port) const override; + void get_local_ip_and_port(std::string &ip, int &port) const override; + socket_t socket() const override; + +private: + socket_t sock_; + time_t read_timeout_sec_; + time_t read_timeout_usec_; + time_t write_timeout_sec_; + time_t write_timeout_usec_; + + std::vector read_buff_; + size_t read_buff_off_ = 0; + size_t read_buff_content_size_ = 0; + + static const size_t read_buff_size_ = 1024 * 4; +}; + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +class SSLSocketStream : public Stream { +public: + SSLSocketStream(socket_t sock, SSL *ssl, time_t read_timeout_sec, + time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec); + ~SSLSocketStream() override; + + bool is_readable() const override; + bool is_writable() const override; + ssize_t read(char *ptr, size_t size) override; + ssize_t write(const char *ptr, size_t size) override; + void get_remote_ip_and_port(std::string &ip, int &port) const override; + void get_local_ip_and_port(std::string &ip, int &port) const override; + socket_t socket() const override; + +private: + socket_t sock_; + SSL *ssl_; + time_t read_timeout_sec_; + time_t read_timeout_usec_; + time_t write_timeout_sec_; + time_t write_timeout_usec_; +}; +#endif + +inline bool keep_alive(socket_t sock, time_t keep_alive_timeout_sec) { + using namespace std::chrono; + auto start = steady_clock::now(); + while (true) { + auto val = select_read(sock, 0, 10000); + if (val < 0) { + return false; + } else if (val == 0) { + auto current = steady_clock::now(); + auto duration = duration_cast(current - start); + auto timeout = keep_alive_timeout_sec * 1000; + if (duration.count() > timeout) { return false; } + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } else { + return true; + } + } +} + +template +inline bool +process_server_socket_core(const std::atomic &svr_sock, socket_t sock, + size_t keep_alive_max_count, + time_t keep_alive_timeout_sec, T callback) { + assert(keep_alive_max_count > 0); + auto ret = false; + auto count = keep_alive_max_count; + while (svr_sock != INVALID_SOCKET && count > 0 && + keep_alive(sock, keep_alive_timeout_sec)) { + auto close_connection = count == 1; + auto connection_closed = false; + ret = callback(close_connection, connection_closed); + if (!ret || connection_closed) { break; } + count--; + } + return ret; +} + +template +inline bool +process_server_socket(const std::atomic &svr_sock, socket_t sock, + size_t keep_alive_max_count, + time_t keep_alive_timeout_sec, time_t read_timeout_sec, + time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, T callback) { + return process_server_socket_core( + svr_sock, sock, keep_alive_max_count, keep_alive_timeout_sec, + [&](bool close_connection, bool &connection_closed) { + SocketStream strm(sock, read_timeout_sec, read_timeout_usec, + write_timeout_sec, write_timeout_usec); + return callback(strm, close_connection, connection_closed); + }); +} + +inline bool process_client_socket(socket_t sock, time_t read_timeout_sec, + time_t read_timeout_usec, + time_t write_timeout_sec, + time_t write_timeout_usec, + std::function callback) { + SocketStream strm(sock, read_timeout_sec, read_timeout_usec, + write_timeout_sec, write_timeout_usec); + return callback(strm); +} + +inline int shutdown_socket(socket_t sock) { +#ifdef _WIN32 + return shutdown(sock, SD_BOTH); +#else + return shutdown(sock, SHUT_RDWR); +#endif +} + +template +socket_t create_socket(const std::string &host, const std::string &ip, int port, + int address_family, int socket_flags, bool tcp_nodelay, + SocketOptions socket_options, + BindOrConnect bind_or_connect) { + // Get address info + const char *node = nullptr; + struct addrinfo hints; + struct addrinfo *result; + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = 0; + + if (!ip.empty()) { + node = ip.c_str(); + // Ask getaddrinfo to convert IP in c-string to address + hints.ai_family = AF_UNSPEC; + hints.ai_flags = AI_NUMERICHOST; + } else { + if (!host.empty()) { node = host.c_str(); } + hints.ai_family = address_family; + hints.ai_flags = socket_flags; + } + +#ifndef _WIN32 + if (hints.ai_family == AF_UNIX) { + const auto addrlen = host.length(); + if (addrlen > sizeof(sockaddr_un::sun_path)) return INVALID_SOCKET; + + auto sock = socket(hints.ai_family, hints.ai_socktype, hints.ai_protocol); + if (sock != INVALID_SOCKET) { + sockaddr_un addr{}; + addr.sun_family = AF_UNIX; + std::copy(host.begin(), host.end(), addr.sun_path); + + hints.ai_addr = reinterpret_cast(&addr); + hints.ai_addrlen = static_cast( + sizeof(addr) - sizeof(addr.sun_path) + addrlen); + + fcntl(sock, F_SETFD, FD_CLOEXEC); + if (socket_options) { socket_options(sock); } + + if (!bind_or_connect(sock, hints)) { + close_socket(sock); + sock = INVALID_SOCKET; + } + } + return sock; + } +#endif + + auto service = std::to_string(port); + + if (getaddrinfo(node, service.c_str(), &hints, &result)) { +#if defined __linux__ && !defined __ANDROID__ + res_init(); +#endif + return INVALID_SOCKET; + } + + for (auto rp = result; rp; rp = rp->ai_next) { + // Create a socket +#ifdef _WIN32 + auto sock = + WSASocketW(rp->ai_family, rp->ai_socktype, rp->ai_protocol, nullptr, 0, + WSA_FLAG_NO_HANDLE_INHERIT | WSA_FLAG_OVERLAPPED); + /** + * Since the WSA_FLAG_NO_HANDLE_INHERIT is only supported on Windows 7 SP1 + * and above the socket creation fails on older Windows Systems. + * + * Let's try to create a socket the old way in this case. + * + * Reference: + * https://docs.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-wsasocketa + * + * WSA_FLAG_NO_HANDLE_INHERIT: + * This flag is supported on Windows 7 with SP1, Windows Server 2008 R2 with + * SP1, and later + * + */ + if (sock == INVALID_SOCKET) { + sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); + } +#else + auto sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); +#endif + if (sock == INVALID_SOCKET) { continue; } + +#ifndef _WIN32 + if (fcntl(sock, F_SETFD, FD_CLOEXEC) == -1) { + close_socket(sock); + continue; + } +#endif + + if (tcp_nodelay) { + int yes = 1; + setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, reinterpret_cast(&yes), + sizeof(yes)); + } + + if (socket_options) { socket_options(sock); } + + if (rp->ai_family == AF_INET6) { + int no = 0; + setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, reinterpret_cast(&no), + sizeof(no)); + } + + // bind or connect + if (bind_or_connect(sock, *rp)) { + freeaddrinfo(result); + return sock; + } + + close_socket(sock); + } + + freeaddrinfo(result); + return INVALID_SOCKET; +} + +inline void set_nonblocking(socket_t sock, bool nonblocking) { +#ifdef _WIN32 + auto flags = nonblocking ? 1UL : 0UL; + ioctlsocket(sock, FIONBIO, &flags); +#else + auto flags = fcntl(sock, F_GETFL, 0); + fcntl(sock, F_SETFL, + nonblocking ? (flags | O_NONBLOCK) : (flags & (~O_NONBLOCK))); +#endif +} + +inline bool is_connection_error() { +#ifdef _WIN32 + return WSAGetLastError() != WSAEWOULDBLOCK; +#else + return errno != EINPROGRESS; +#endif +} + +inline bool bind_ip_address(socket_t sock, const std::string &host) { + struct addrinfo hints; + struct addrinfo *result; + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = 0; + + if (getaddrinfo(host.c_str(), "0", &hints, &result)) { return false; } + + auto ret = false; + for (auto rp = result; rp; rp = rp->ai_next) { + const auto &ai = *rp; + if (!::bind(sock, ai.ai_addr, static_cast(ai.ai_addrlen))) { + ret = true; + break; + } + } + + freeaddrinfo(result); + return ret; +} + +#if !defined _WIN32 && !defined ANDROID && !defined _AIX +#define USE_IF2IP +#endif + +#ifdef USE_IF2IP +inline std::string if2ip(int address_family, const std::string &ifn) { + struct ifaddrs *ifap; + getifaddrs(&ifap); + std::string addr_candidate; + for (auto ifa = ifap; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_addr && ifn == ifa->ifa_name && + (AF_UNSPEC == address_family || + ifa->ifa_addr->sa_family == address_family)) { + if (ifa->ifa_addr->sa_family == AF_INET) { + auto sa = reinterpret_cast(ifa->ifa_addr); + char buf[INET_ADDRSTRLEN]; + if (inet_ntop(AF_INET, &sa->sin_addr, buf, INET_ADDRSTRLEN)) { + freeifaddrs(ifap); + return std::string(buf, INET_ADDRSTRLEN); + } + } else if (ifa->ifa_addr->sa_family == AF_INET6) { + auto sa = reinterpret_cast(ifa->ifa_addr); + if (!IN6_IS_ADDR_LINKLOCAL(&sa->sin6_addr)) { + char buf[INET6_ADDRSTRLEN] = {}; + if (inet_ntop(AF_INET6, &sa->sin6_addr, buf, INET6_ADDRSTRLEN)) { + // equivalent to mac's IN6_IS_ADDR_UNIQUE_LOCAL + auto s6_addr_head = sa->sin6_addr.s6_addr[0]; + if (s6_addr_head == 0xfc || s6_addr_head == 0xfd) { + addr_candidate = std::string(buf, INET6_ADDRSTRLEN); + } else { + freeifaddrs(ifap); + return std::string(buf, INET6_ADDRSTRLEN); + } + } + } + } + } + } + freeifaddrs(ifap); + return addr_candidate; +} +#endif + +inline socket_t create_client_socket( + const std::string &host, const std::string &ip, int port, + int address_family, bool tcp_nodelay, SocketOptions socket_options, + time_t connection_timeout_sec, time_t connection_timeout_usec, + time_t read_timeout_sec, time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, const std::string &intf, Error &error) { + auto sock = create_socket( + host, ip, port, address_family, 0, tcp_nodelay, std::move(socket_options), + [&](socket_t sock2, struct addrinfo &ai) -> bool { + if (!intf.empty()) { +#ifdef USE_IF2IP + auto ip_from_if = if2ip(address_family, intf); + if (ip_from_if.empty()) { ip_from_if = intf; } + if (!bind_ip_address(sock2, ip_from_if.c_str())) { + error = Error::BindIPAddress; + return false; + } +#endif + } + + set_nonblocking(sock2, true); + + auto ret = + ::connect(sock2, ai.ai_addr, static_cast(ai.ai_addrlen)); + + if (ret < 0) { + if (is_connection_error()) { + error = Error::Connection; + return false; + } + error = wait_until_socket_is_ready(sock2, connection_timeout_sec, + connection_timeout_usec); + if (error != Error::Success) { return false; } + } + + set_nonblocking(sock2, false); + + { +#ifdef _WIN32 + auto timeout = static_cast(read_timeout_sec * 1000 + + read_timeout_usec / 1000); + setsockopt(sock2, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(timeout)); +#else + timeval tv; + tv.tv_sec = static_cast(read_timeout_sec); + tv.tv_usec = static_cast(read_timeout_usec); + setsockopt(sock2, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)); +#endif + } + { + +#ifdef _WIN32 + auto timeout = static_cast(write_timeout_sec * 1000 + + write_timeout_usec / 1000); + setsockopt(sock2, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, + sizeof(timeout)); +#else + timeval tv; + tv.tv_sec = static_cast(write_timeout_sec); + tv.tv_usec = static_cast(write_timeout_usec); + setsockopt(sock2, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, sizeof(tv)); +#endif + } + + error = Error::Success; + return true; + }); + + if (sock != INVALID_SOCKET) { + error = Error::Success; + } else { + if (error == Error::Success) { error = Error::Connection; } + } + + return sock; +} + +inline bool get_ip_and_port(const struct sockaddr_storage &addr, + socklen_t addr_len, std::string &ip, int &port) { + if (addr.ss_family == AF_INET) { + port = ntohs(reinterpret_cast(&addr)->sin_port); + } else if (addr.ss_family == AF_INET6) { + port = + ntohs(reinterpret_cast(&addr)->sin6_port); + } else { + return false; + } + + std::array ipstr{}; + if (getnameinfo(reinterpret_cast(&addr), addr_len, + ipstr.data(), static_cast(ipstr.size()), nullptr, + 0, NI_NUMERICHOST)) { + return false; + } + + ip = ipstr.data(); + return true; +} + +inline void get_local_ip_and_port(socket_t sock, std::string &ip, int &port) { + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + if (!getsockname(sock, reinterpret_cast(&addr), + &addr_len)) { + get_ip_and_port(addr, addr_len, ip, port); + } +} + +inline void get_remote_ip_and_port(socket_t sock, std::string &ip, int &port) { + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + + if (!getpeername(sock, reinterpret_cast(&addr), + &addr_len)) { +#ifndef _WIN32 + if (addr.ss_family == AF_UNIX) { +#if defined(__linux__) + struct ucred ucred; + socklen_t len = sizeof(ucred); + if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &len) == 0) { + port = ucred.pid; + } +#elif defined(SOL_LOCAL) && defined(SO_PEERPID) // __APPLE__ + pid_t pid; + socklen_t len = sizeof(pid); + if (getsockopt(sock, SOL_LOCAL, SO_PEERPID, &pid, &len) == 0) { + port = pid; + } +#endif + return; + } +#endif + get_ip_and_port(addr, addr_len, ip, port); + } +} + +inline constexpr unsigned int str2tag_core(const char *s, size_t l, + unsigned int h) { + return (l == 0) + ? h + : str2tag_core( + s + 1, l - 1, + // Unsets the 6 high bits of h, therefore no overflow happens + (((std::numeric_limits::max)() >> 6) & + h * 33) ^ + static_cast(*s)); +} + +inline unsigned int str2tag(const std::string &s) { + return str2tag_core(s.data(), s.size(), 0); +} + +namespace udl { + +inline constexpr unsigned int operator"" _t(const char *s, size_t l) { + return str2tag_core(s, l, 0); +} + +} // namespace udl + +inline const char * +find_content_type(const std::string &path, + const std::map &user_data) { + auto ext = file_extension(path); + + auto it = user_data.find(ext); + if (it != user_data.end()) { return it->second.c_str(); } + + using udl::operator""_t; + + switch (str2tag(ext)) { + default: return nullptr; + case "css"_t: return "text/css"; + case "csv"_t: return "text/csv"; + case "htm"_t: + case "html"_t: return "text/html"; + case "js"_t: + case "mjs"_t: return "text/javascript"; + case "txt"_t: return "text/plain"; + case "vtt"_t: return "text/vtt"; + + case "apng"_t: return "image/apng"; + case "avif"_t: return "image/avif"; + case "bmp"_t: return "image/bmp"; + case "gif"_t: return "image/gif"; + case "png"_t: return "image/png"; + case "svg"_t: return "image/svg+xml"; + case "webp"_t: return "image/webp"; + case "ico"_t: return "image/x-icon"; + case "tif"_t: return "image/tiff"; + case "tiff"_t: return "image/tiff"; + case "jpg"_t: + case "jpeg"_t: return "image/jpeg"; + + case "mp4"_t: return "video/mp4"; + case "mpeg"_t: return "video/mpeg"; + case "webm"_t: return "video/webm"; + + case "mp3"_t: return "audio/mp3"; + case "mpga"_t: return "audio/mpeg"; + case "weba"_t: return "audio/webm"; + case "wav"_t: return "audio/wave"; + + case "otf"_t: return "font/otf"; + case "ttf"_t: return "font/ttf"; + case "woff"_t: return "font/woff"; + case "woff2"_t: return "font/woff2"; + + case "7z"_t: return "application/x-7z-compressed"; + case "atom"_t: return "application/atom+xml"; + case "pdf"_t: return "application/pdf"; + case "json"_t: return "application/json"; + case "rss"_t: return "application/rss+xml"; + case "tar"_t: return "application/x-tar"; + case "xht"_t: + case "xhtml"_t: return "application/xhtml+xml"; + case "xslt"_t: return "application/xslt+xml"; + case "xml"_t: return "application/xml"; + case "gz"_t: return "application/gzip"; + case "zip"_t: return "application/zip"; + case "wasm"_t: return "application/wasm"; + } +} + +inline const char *status_message(int status) { + switch (status) { + case 100: return "Continue"; + case 101: return "Switching Protocol"; + case 102: return "Processing"; + case 103: return "Early Hints"; + case 200: return "OK"; + case 201: return "Created"; + case 202: return "Accepted"; + case 203: return "Non-Authoritative Information"; + case 204: return "No Content"; + case 205: return "Reset Content"; + case 206: return "Partial Content"; + case 207: return "Multi-Status"; + case 208: return "Already Reported"; + case 226: return "IM Used"; + case 300: return "Multiple Choice"; + case 301: return "Moved Permanently"; + case 302: return "Found"; + case 303: return "See Other"; + case 304: return "Not Modified"; + case 305: return "Use Proxy"; + case 306: return "unused"; + case 307: return "Temporary Redirect"; + case 308: return "Permanent Redirect"; + case 400: return "Bad Request"; + case 401: return "Unauthorized"; + case 402: return "Payment Required"; + case 403: return "Forbidden"; + case 404: return "Not Found"; + case 405: return "Method Not Allowed"; + case 406: return "Not Acceptable"; + case 407: return "Proxy Authentication Required"; + case 408: return "Request Timeout"; + case 409: return "Conflict"; + case 410: return "Gone"; + case 411: return "Length Required"; + case 412: return "Precondition Failed"; + case 413: return "Payload Too Large"; + case 414: return "URI Too Long"; + case 415: return "Unsupported Media Type"; + case 416: return "Range Not Satisfiable"; + case 417: return "Expectation Failed"; + case 418: return "I'm a teapot"; + case 421: return "Misdirected Request"; + case 422: return "Unprocessable Entity"; + case 423: return "Locked"; + case 424: return "Failed Dependency"; + case 425: return "Too Early"; + case 426: return "Upgrade Required"; + case 428: return "Precondition Required"; + case 429: return "Too Many Requests"; + case 431: return "Request Header Fields Too Large"; + case 451: return "Unavailable For Legal Reasons"; + case 501: return "Not Implemented"; + case 502: return "Bad Gateway"; + case 503: return "Service Unavailable"; + case 504: return "Gateway Timeout"; + case 505: return "HTTP Version Not Supported"; + case 506: return "Variant Also Negotiates"; + case 507: return "Insufficient Storage"; + case 508: return "Loop Detected"; + case 510: return "Not Extended"; + case 511: return "Network Authentication Required"; + + default: + case 500: return "Internal Server Error"; + } +} + +inline bool can_compress_content_type(const std::string &content_type) { + using udl::operator""_t; + + auto tag = str2tag(content_type); + + switch (tag) { + case "image/svg+xml"_t: + case "application/javascript"_t: + case "application/json"_t: + case "application/xml"_t: + case "application/protobuf"_t: + case "application/xhtml+xml"_t: return true; + + default: + return !content_type.rfind("text/", 0) && tag != "text/event-stream"_t; + } +} + +inline EncodingType encoding_type(const Request &req, const Response &res) { + auto ret = + detail::can_compress_content_type(res.get_header_value("Content-Type")); + if (!ret) { return EncodingType::None; } + + const auto &s = req.get_header_value("Accept-Encoding"); + (void)(s); + +#ifdef CPPHTTPLIB_BROTLI_SUPPORT + // TODO: 'Accept-Encoding' has br, not br;q=0 + ret = s.find("br") != std::string::npos; + if (ret) { return EncodingType::Brotli; } +#endif + +#ifdef CPPHTTPLIB_ZLIB_SUPPORT + // TODO: 'Accept-Encoding' has gzip, not gzip;q=0 + ret = s.find("gzip") != std::string::npos; + if (ret) { return EncodingType::Gzip; } +#endif + + return EncodingType::None; +} + +inline bool nocompressor::compress(const char *data, size_t data_length, + bool /*last*/, Callback callback) { + if (!data_length) { return true; } + return callback(data, data_length); +} + +#ifdef CPPHTTPLIB_ZLIB_SUPPORT +inline gzip_compressor::gzip_compressor() { + std::memset(&strm_, 0, sizeof(strm_)); + strm_.zalloc = Z_NULL; + strm_.zfree = Z_NULL; + strm_.opaque = Z_NULL; + + is_valid_ = deflateInit2(&strm_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8, + Z_DEFAULT_STRATEGY) == Z_OK; +} + +inline gzip_compressor::~gzip_compressor() { deflateEnd(&strm_); } + +inline bool gzip_compressor::compress(const char *data, size_t data_length, + bool last, Callback callback) { + assert(is_valid_); + + do { + constexpr size_t max_avail_in = + (std::numeric_limits::max)(); + + strm_.avail_in = static_cast( + (std::min)(data_length, max_avail_in)); + strm_.next_in = const_cast(reinterpret_cast(data)); + + data_length -= strm_.avail_in; + data += strm_.avail_in; + + auto flush = (last && data_length == 0) ? Z_FINISH : Z_NO_FLUSH; + int ret = Z_OK; + + std::array buff{}; + do { + strm_.avail_out = static_cast(buff.size()); + strm_.next_out = reinterpret_cast(buff.data()); + + ret = deflate(&strm_, flush); + if (ret == Z_STREAM_ERROR) { return false; } + + if (!callback(buff.data(), buff.size() - strm_.avail_out)) { + return false; + } + } while (strm_.avail_out == 0); + + assert((flush == Z_FINISH && ret == Z_STREAM_END) || + (flush == Z_NO_FLUSH && ret == Z_OK)); + assert(strm_.avail_in == 0); + } while (data_length > 0); + + return true; +} + +inline gzip_decompressor::gzip_decompressor() { + std::memset(&strm_, 0, sizeof(strm_)); + strm_.zalloc = Z_NULL; + strm_.zfree = Z_NULL; + strm_.opaque = Z_NULL; + + // 15 is the value of wbits, which should be at the maximum possible value + // to ensure that any gzip stream can be decoded. The offset of 32 specifies + // that the stream type should be automatically detected either gzip or + // deflate. + is_valid_ = inflateInit2(&strm_, 32 + 15) == Z_OK; +} + +inline gzip_decompressor::~gzip_decompressor() { inflateEnd(&strm_); } + +inline bool gzip_decompressor::is_valid() const { return is_valid_; } + +inline bool gzip_decompressor::decompress(const char *data, size_t data_length, + Callback callback) { + assert(is_valid_); + + int ret = Z_OK; + + do { + constexpr size_t max_avail_in = + (std::numeric_limits::max)(); + + strm_.avail_in = static_cast( + (std::min)(data_length, max_avail_in)); + strm_.next_in = const_cast(reinterpret_cast(data)); + + data_length -= strm_.avail_in; + data += strm_.avail_in; + + std::array buff{}; + while (strm_.avail_in > 0) { + strm_.avail_out = static_cast(buff.size()); + strm_.next_out = reinterpret_cast(buff.data()); + + auto prev_avail_in = strm_.avail_in; + + ret = inflate(&strm_, Z_NO_FLUSH); + + if (prev_avail_in - strm_.avail_in == 0) { return false; } + + assert(ret != Z_STREAM_ERROR); + switch (ret) { + case Z_NEED_DICT: + case Z_DATA_ERROR: + case Z_MEM_ERROR: inflateEnd(&strm_); return false; + } + + if (!callback(buff.data(), buff.size() - strm_.avail_out)) { + return false; + } + } + + if (ret != Z_OK && ret != Z_STREAM_END) return false; + + } while (data_length > 0); + + return true; +} +#endif + +#ifdef CPPHTTPLIB_BROTLI_SUPPORT +inline brotli_compressor::brotli_compressor() { + state_ = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); +} + +inline brotli_compressor::~brotli_compressor() { + BrotliEncoderDestroyInstance(state_); +} + +inline bool brotli_compressor::compress(const char *data, size_t data_length, + bool last, Callback callback) { + std::array buff{}; + + auto operation = last ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS; + auto available_in = data_length; + auto next_in = reinterpret_cast(data); + + for (;;) { + if (last) { + if (BrotliEncoderIsFinished(state_)) { break; } + } else { + if (!available_in) { break; } + } + + auto available_out = buff.size(); + auto next_out = buff.data(); + + if (!BrotliEncoderCompressStream(state_, operation, &available_in, &next_in, + &available_out, &next_out, nullptr)) { + return false; + } + + auto output_bytes = buff.size() - available_out; + if (output_bytes) { + callback(reinterpret_cast(buff.data()), output_bytes); + } + } + + return true; +} + +inline brotli_decompressor::brotli_decompressor() { + decoder_s = BrotliDecoderCreateInstance(0, 0, 0); + decoder_r = decoder_s ? BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT + : BROTLI_DECODER_RESULT_ERROR; +} + +inline brotli_decompressor::~brotli_decompressor() { + if (decoder_s) { BrotliDecoderDestroyInstance(decoder_s); } +} + +inline bool brotli_decompressor::is_valid() const { return decoder_s; } + +inline bool brotli_decompressor::decompress(const char *data, + size_t data_length, + Callback callback) { + if (decoder_r == BROTLI_DECODER_RESULT_SUCCESS || + decoder_r == BROTLI_DECODER_RESULT_ERROR) { + return 0; + } + + const uint8_t *next_in = (const uint8_t *)data; + size_t avail_in = data_length; + size_t total_out; + + decoder_r = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT; + + std::array buff{}; + while (decoder_r == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { + char *next_out = buff.data(); + size_t avail_out = buff.size(); + + decoder_r = BrotliDecoderDecompressStream( + decoder_s, &avail_in, &next_in, &avail_out, + reinterpret_cast(&next_out), &total_out); + + if (decoder_r == BROTLI_DECODER_RESULT_ERROR) { return false; } + + if (!callback(buff.data(), buff.size() - avail_out)) { return false; } + } + + return decoder_r == BROTLI_DECODER_RESULT_SUCCESS || + decoder_r == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT; +} +#endif + +inline bool has_header(const Headers &headers, const std::string &key) { + return headers.find(key) != headers.end(); +} + +inline const char *get_header_value(const Headers &headers, + const std::string &key, size_t id, + const char *def) { + auto rng = headers.equal_range(key); + auto it = rng.first; + std::advance(it, static_cast(id)); + if (it != rng.second) { return it->second.c_str(); } + return def; +} + +inline bool compare_case_ignore(const std::string &a, const std::string &b) { + if (a.size() != b.size()) { return false; } + for (size_t i = 0; i < b.size(); i++) { + if (::tolower(a[i]) != ::tolower(b[i])) { return false; } + } + return true; +} + +template +inline bool parse_header(const char *beg, const char *end, T fn) { + // Skip trailing spaces and tabs. + while (beg < end && is_space_or_tab(end[-1])) { + end--; + } + + auto p = beg; + while (p < end && *p != ':') { + p++; + } + + if (p == end) { return false; } + + auto key_end = p; + + if (*p++ != ':') { return false; } + + while (p < end && is_space_or_tab(*p)) { + p++; + } + + if (p < end) { + auto key = std::string(beg, key_end); + auto val = compare_case_ignore(key, "Location") + ? std::string(p, end) + : decode_url(std::string(p, end), false); + fn(std::move(key), std::move(val)); + return true; + } + + return false; +} + +inline bool read_headers(Stream &strm, Headers &headers) { + const auto bufsiz = 2048; + char buf[bufsiz]; + stream_line_reader line_reader(strm, buf, bufsiz); + + for (;;) { + if (!line_reader.getline()) { return false; } + + // Check if the line ends with CRLF. + auto line_terminator_len = 2; + if (line_reader.end_with_crlf()) { + // Blank line indicates end of headers. + if (line_reader.size() == 2) { break; } +#ifdef CPPHTTPLIB_ALLOW_LF_AS_LINE_TERMINATOR + } else { + // Blank line indicates end of headers. + if (line_reader.size() == 1) { break; } + line_terminator_len = 1; + } +#else + } else { + continue; // Skip invalid line. + } +#endif + + if (line_reader.size() > CPPHTTPLIB_HEADER_MAX_LENGTH) { return false; } + + // Exclude line terminator + auto end = line_reader.ptr() + line_reader.size() - line_terminator_len; + + parse_header(line_reader.ptr(), end, + [&](std::string &&key, std::string &&val) { + headers.emplace(std::move(key), std::move(val)); + }); + } + + return true; +} + +inline bool read_content_with_length(Stream &strm, uint64_t len, + Progress progress, + ContentReceiverWithProgress out) { + char buf[CPPHTTPLIB_RECV_BUFSIZ]; + + uint64_t r = 0; + while (r < len) { + auto read_len = static_cast(len - r); + auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ)); + if (n <= 0) { return false; } + + if (!out(buf, static_cast(n), r, len)) { return false; } + r += static_cast(n); + + if (progress) { + if (!progress(r, len)) { return false; } + } + } + + return true; +} + +inline void skip_content_with_length(Stream &strm, uint64_t len) { + char buf[CPPHTTPLIB_RECV_BUFSIZ]; + uint64_t r = 0; + while (r < len) { + auto read_len = static_cast(len - r); + auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ)); + if (n <= 0) { return; } + r += static_cast(n); + } +} + +inline bool read_content_without_length(Stream &strm, + ContentReceiverWithProgress out) { + char buf[CPPHTTPLIB_RECV_BUFSIZ]; + uint64_t r = 0; + for (;;) { + auto n = strm.read(buf, CPPHTTPLIB_RECV_BUFSIZ); + if (n < 0) { + return false; + } else if (n == 0) { + return true; + } + + if (!out(buf, static_cast(n), r, 0)) { return false; } + r += static_cast(n); + } + + return true; +} + +template +inline bool read_content_chunked(Stream &strm, T &x, + ContentReceiverWithProgress out) { + const auto bufsiz = 16; + char buf[bufsiz]; + + stream_line_reader line_reader(strm, buf, bufsiz); + + if (!line_reader.getline()) { return false; } + + unsigned long chunk_len; + while (true) { + char *end_ptr; + + chunk_len = std::strtoul(line_reader.ptr(), &end_ptr, 16); + + if (end_ptr == line_reader.ptr()) { return false; } + if (chunk_len == ULONG_MAX) { return false; } + + if (chunk_len == 0) { break; } + + if (!read_content_with_length(strm, chunk_len, nullptr, out)) { + return false; + } + + if (!line_reader.getline()) { return false; } + + if (strcmp(line_reader.ptr(), "\r\n")) { return false; } + + if (!line_reader.getline()) { return false; } + } + + assert(chunk_len == 0); + + // Trailer + if (!line_reader.getline()) { return false; } + + while (strcmp(line_reader.ptr(), "\r\n")) { + if (line_reader.size() > CPPHTTPLIB_HEADER_MAX_LENGTH) { return false; } + + // Exclude line terminator + constexpr auto line_terminator_len = 2; + auto end = line_reader.ptr() + line_reader.size() - line_terminator_len; + + parse_header(line_reader.ptr(), end, + [&](std::string &&key, std::string &&val) { + x.headers.emplace(std::move(key), std::move(val)); + }); + + if (!line_reader.getline()) { return false; } + } + + return true; +} + +inline bool is_chunked_transfer_encoding(const Headers &headers) { + return !strcasecmp(get_header_value(headers, "Transfer-Encoding", 0, ""), + "chunked"); +} + +template +bool prepare_content_receiver(T &x, int &status, + ContentReceiverWithProgress receiver, + bool decompress, U callback) { + if (decompress) { + std::string encoding = x.get_header_value("Content-Encoding"); + std::unique_ptr decompressor; + + if (encoding == "gzip" || encoding == "deflate") { +#ifdef CPPHTTPLIB_ZLIB_SUPPORT + decompressor = detail::make_unique(); +#else + status = 415; + return false; +#endif + } else if (encoding.find("br") != std::string::npos) { +#ifdef CPPHTTPLIB_BROTLI_SUPPORT + decompressor = detail::make_unique(); +#else + status = 415; + return false; +#endif + } + + if (decompressor) { + if (decompressor->is_valid()) { + ContentReceiverWithProgress out = [&](const char *buf, size_t n, + uint64_t off, uint64_t len) { + return decompressor->decompress(buf, n, + [&](const char *buf2, size_t n2) { + return receiver(buf2, n2, off, len); + }); + }; + return callback(std::move(out)); + } else { + status = 500; + return false; + } + } + } + + ContentReceiverWithProgress out = [&](const char *buf, size_t n, uint64_t off, + uint64_t len) { + return receiver(buf, n, off, len); + }; + return callback(std::move(out)); +} + +template +bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, + Progress progress, ContentReceiverWithProgress receiver, + bool decompress) { + return prepare_content_receiver( + x, status, std::move(receiver), decompress, + [&](const ContentReceiverWithProgress &out) { + auto ret = true; + auto exceed_payload_max_length = false; + + if (is_chunked_transfer_encoding(x.headers)) { + ret = read_content_chunked(strm, x, out); + } else if (!has_header(x.headers, "Content-Length")) { + ret = read_content_without_length(strm, out); + } else { + auto len = get_header_value(x.headers, "Content-Length"); + if (len > payload_max_length) { + exceed_payload_max_length = true; + skip_content_with_length(strm, len); + ret = false; + } else if (len > 0) { + ret = read_content_with_length(strm, len, std::move(progress), out); + } + } + + if (!ret) { status = exceed_payload_max_length ? 413 : 400; } + return ret; + }); +} // namespace detail + +inline ssize_t write_headers(Stream &strm, const Headers &headers) { + ssize_t write_len = 0; + for (const auto &x : headers) { + auto len = + strm.write_format("%s: %s\r\n", x.first.c_str(), x.second.c_str()); + if (len < 0) { return len; } + write_len += len; + } + auto len = strm.write("\r\n"); + if (len < 0) { return len; } + write_len += len; + return write_len; +} + +inline bool write_data(Stream &strm, const char *d, size_t l) { + size_t offset = 0; + while (offset < l) { + auto length = strm.write(d + offset, l - offset); + if (length < 0) { return false; } + offset += static_cast(length); + } + return true; +} + +template +inline bool write_content(Stream &strm, const ContentProvider &content_provider, + size_t offset, size_t length, T is_shutting_down, + Error &error) { + size_t end_offset = offset + length; + auto ok = true; + DataSink data_sink; + + data_sink.write = [&](const char *d, size_t l) -> bool { + if (ok) { + if (strm.is_writable() && write_data(strm, d, l)) { + offset += l; + } else { + ok = false; + } + } + return ok; + }; + + while (offset < end_offset && !is_shutting_down()) { + if (!strm.is_writable()) { + error = Error::Write; + return false; + } else if (!content_provider(offset, end_offset - offset, data_sink)) { + error = Error::Canceled; + return false; + } else if (!ok) { + error = Error::Write; + return false; + } + } + + error = Error::Success; + return true; +} + +template +inline bool write_content(Stream &strm, const ContentProvider &content_provider, + size_t offset, size_t length, + const T &is_shutting_down) { + auto error = Error::Success; + return write_content(strm, content_provider, offset, length, is_shutting_down, + error); +} + +template +inline bool +write_content_without_length(Stream &strm, + const ContentProvider &content_provider, + const T &is_shutting_down) { + size_t offset = 0; + auto data_available = true; + auto ok = true; + DataSink data_sink; + + data_sink.write = [&](const char *d, size_t l) -> bool { + if (ok) { + offset += l; + if (!strm.is_writable() || !write_data(strm, d, l)) { ok = false; } + } + return ok; + }; + + data_sink.done = [&](void) { data_available = false; }; + + while (data_available && !is_shutting_down()) { + if (!strm.is_writable()) { + return false; + } else if (!content_provider(offset, 0, data_sink)) { + return false; + } else if (!ok) { + return false; + } + } + return true; +} + +template +inline bool +write_content_chunked(Stream &strm, const ContentProvider &content_provider, + const T &is_shutting_down, U &compressor, Error &error) { + size_t offset = 0; + auto data_available = true; + auto ok = true; + DataSink data_sink; + + data_sink.write = [&](const char *d, size_t l) -> bool { + if (ok) { + data_available = l > 0; + offset += l; + + std::string payload; + if (compressor.compress(d, l, false, + [&](const char *data, size_t data_len) { + payload.append(data, data_len); + return true; + })) { + if (!payload.empty()) { + // Emit chunked response header and footer for each chunk + auto chunk = + from_i_to_hex(payload.size()) + "\r\n" + payload + "\r\n"; + if (!strm.is_writable() || + !write_data(strm, chunk.data(), chunk.size())) { + ok = false; + } + } + } else { + ok = false; + } + } + return ok; + }; + + auto done_with_trailer = [&](const Headers *trailer) { + if (!ok) { return; } + + data_available = false; + + std::string payload; + if (!compressor.compress(nullptr, 0, true, + [&](const char *data, size_t data_len) { + payload.append(data, data_len); + return true; + })) { + ok = false; + return; + } + + if (!payload.empty()) { + // Emit chunked response header and footer for each chunk + auto chunk = from_i_to_hex(payload.size()) + "\r\n" + payload + "\r\n"; + if (!strm.is_writable() || + !write_data(strm, chunk.data(), chunk.size())) { + ok = false; + return; + } + } + + static const std::string done_marker("0\r\n"); + if (!write_data(strm, done_marker.data(), done_marker.size())) { + ok = false; + } + + // Trailer + if (trailer) { + for (const auto &kv : *trailer) { + std::string field_line = kv.first + ": " + kv.second + "\r\n"; + if (!write_data(strm, field_line.data(), field_line.size())) { + ok = false; + } + } + } + + static const std::string crlf("\r\n"); + if (!write_data(strm, crlf.data(), crlf.size())) { ok = false; } + }; + + data_sink.done = [&](void) { done_with_trailer(nullptr); }; + + data_sink.done_with_trailer = [&](const Headers &trailer) { + done_with_trailer(&trailer); + }; + + while (data_available && !is_shutting_down()) { + if (!strm.is_writable()) { + error = Error::Write; + return false; + } else if (!content_provider(offset, 0, data_sink)) { + error = Error::Canceled; + return false; + } else if (!ok) { + error = Error::Write; + return false; + } + } + + error = Error::Success; + return true; +} + +template +inline bool write_content_chunked(Stream &strm, + const ContentProvider &content_provider, + const T &is_shutting_down, U &compressor) { + auto error = Error::Success; + return write_content_chunked(strm, content_provider, is_shutting_down, + compressor, error); +} + +template +inline bool redirect(T &cli, Request &req, Response &res, + const std::string &path, const std::string &location, + Error &error) { + Request new_req = req; + new_req.path = path; + new_req.redirect_count_ -= 1; + + if (res.status == 303 && (req.method != "GET" && req.method != "HEAD")) { + new_req.method = "GET"; + new_req.body.clear(); + new_req.headers.clear(); + } + + Response new_res; + + auto ret = cli.send(new_req, new_res, error); + if (ret) { + req = new_req; + res = new_res; + res.location = location; + } + return ret; +} + +inline std::string params_to_query_str(const Params ¶ms) { + std::string query; + + for (auto it = params.begin(); it != params.end(); ++it) { + if (it != params.begin()) { query += "&"; } + query += it->first; + query += "="; + query += encode_query_param(it->second); + } + return query; +} + +inline void parse_query_text(const std::string &s, Params ¶ms) { + std::set cache; + split(s.data(), s.data() + s.size(), '&', [&](const char *b, const char *e) { + std::string kv(b, e); + if (cache.find(kv) != cache.end()) { return; } + cache.insert(kv); + + std::string key; + std::string val; + split(b, e, '=', [&](const char *b2, const char *e2) { + if (key.empty()) { + key.assign(b2, e2); + } else { + val.assign(b2, e2); + } + }); + + if (!key.empty()) { + params.emplace(decode_url(key, true), decode_url(val, true)); + } + }); +} + +inline bool parse_multipart_boundary(const std::string &content_type, + std::string &boundary) { + auto boundary_keyword = "boundary="; + auto pos = content_type.find(boundary_keyword); + if (pos == std::string::npos) { return false; } + auto end = content_type.find(';', pos); + auto beg = pos + strlen(boundary_keyword); + boundary = content_type.substr(beg, end - beg); + if (boundary.length() >= 2 && boundary.front() == '"' && + boundary.back() == '"') { + boundary = boundary.substr(1, boundary.size() - 2); + } + return !boundary.empty(); +} + +#ifdef CPPHTTPLIB_NO_EXCEPTIONS +inline bool parse_range_header(const std::string &s, Ranges &ranges) { +#else +inline bool parse_range_header(const std::string &s, Ranges &ranges) try { +#endif + static auto re_first_range = std::regex(R"(bytes=(\d*-\d*(?:,\s*\d*-\d*)*))"); + std::smatch m; + if (std::regex_match(s, m, re_first_range)) { + auto pos = static_cast(m.position(1)); + auto len = static_cast(m.length(1)); + bool all_valid_ranges = true; + split(&s[pos], &s[pos + len], ',', [&](const char *b, const char *e) { + if (!all_valid_ranges) return; + static auto re_another_range = std::regex(R"(\s*(\d*)-(\d*))"); + std::cmatch cm; + if (std::regex_match(b, e, cm, re_another_range)) { + ssize_t first = -1; + if (!cm.str(1).empty()) { + first = static_cast(std::stoll(cm.str(1))); + } + + ssize_t last = -1; + if (!cm.str(2).empty()) { + last = static_cast(std::stoll(cm.str(2))); + } + + if (first != -1 && last != -1 && first > last) { + all_valid_ranges = false; + return; + } + ranges.emplace_back(std::make_pair(first, last)); + } + }); + return all_valid_ranges; + } + return false; +#ifdef CPPHTTPLIB_NO_EXCEPTIONS +} +#else +} catch (...) { return false; } +#endif + +class MultipartFormDataParser { +public: + MultipartFormDataParser() = default; + + void set_boundary(std::string &&boundary) { + boundary_ = boundary; + dash_boundary_crlf_ = dash_ + boundary_ + crlf_; + crlf_dash_boundary_ = crlf_ + dash_ + boundary_; + } + + bool is_valid() const { return is_valid_; } + + bool parse(const char *buf, size_t n, const ContentReceiver &content_callback, + const MultipartContentHeader &header_callback) { + + // TODO: support 'filename*' + static const std::regex re_content_disposition( + R"~(^Content-Disposition:\s*form-data;\s*name="(.*?)"(?:;\s*filename="(.*?)")?(?:;\s*filename\*=\S+)?\s*$)~", + std::regex_constants::icase); + + buf_append(buf, n); + + while (buf_size() > 0) { + switch (state_) { + case 0: { // Initial boundary + buf_erase(buf_find(dash_boundary_crlf_)); + if (dash_boundary_crlf_.size() > buf_size()) { return true; } + if (!buf_start_with(dash_boundary_crlf_)) { return false; } + buf_erase(dash_boundary_crlf_.size()); + state_ = 1; + break; + } + case 1: { // New entry + clear_file_info(); + state_ = 2; + break; + } + case 2: { // Headers + auto pos = buf_find(crlf_); + if (pos > CPPHTTPLIB_HEADER_MAX_LENGTH) { return false; } + while (pos < buf_size()) { + // Empty line + if (pos == 0) { + if (!header_callback(file_)) { + is_valid_ = false; + return false; + } + buf_erase(crlf_.size()); + state_ = 3; + break; + } + + static const std::string header_name = "content-type:"; + const auto header = buf_head(pos); + if (start_with_case_ignore(header, header_name)) { + file_.content_type = trim_copy(header.substr(header_name.size())); + } else { + std::smatch m; + if (std::regex_match(header, m, re_content_disposition)) { + file_.name = m[1]; + file_.filename = m[2]; + } else { + is_valid_ = false; + return false; + } + } + buf_erase(pos + crlf_.size()); + pos = buf_find(crlf_); + } + if (state_ != 3) { return true; } + break; + } + case 3: { // Body + if (crlf_dash_boundary_.size() > buf_size()) { return true; } + auto pos = buf_find(crlf_dash_boundary_); + if (pos < buf_size()) { + if (!content_callback(buf_data(), pos)) { + is_valid_ = false; + return false; + } + buf_erase(pos + crlf_dash_boundary_.size()); + state_ = 4; + } else { + auto len = buf_size() - crlf_dash_boundary_.size(); + if (len > 0) { + if (!content_callback(buf_data(), len)) { + is_valid_ = false; + return false; + } + buf_erase(len); + } + return true; + } + break; + } + case 4: { // Boundary + if (crlf_.size() > buf_size()) { return true; } + if (buf_start_with(crlf_)) { + buf_erase(crlf_.size()); + state_ = 1; + } else { + if (dash_crlf_.size() > buf_size()) { return true; } + if (buf_start_with(dash_crlf_)) { + buf_erase(dash_crlf_.size()); + is_valid_ = true; + buf_erase(buf_size()); // Remove epilogue + } else { + return true; + } + } + break; + } + } + } + + return true; + } + +private: + void clear_file_info() { + file_.name.clear(); + file_.filename.clear(); + file_.content_type.clear(); + } + + bool start_with_case_ignore(const std::string &a, + const std::string &b) const { + if (a.size() < b.size()) { return false; } + for (size_t i = 0; i < b.size(); i++) { + if (::tolower(a[i]) != ::tolower(b[i])) { return false; } + } + return true; + } + + const std::string dash_ = "--"; + const std::string crlf_ = "\r\n"; + const std::string dash_crlf_ = "--\r\n"; + std::string boundary_; + std::string dash_boundary_crlf_; + std::string crlf_dash_boundary_; + + size_t state_ = 0; + bool is_valid_ = false; + MultipartFormData file_; + + // Buffer + bool start_with(const std::string &a, size_t spos, size_t epos, + const std::string &b) const { + if (epos - spos < b.size()) { return false; } + for (size_t i = 0; i < b.size(); i++) { + if (a[i + spos] != b[i]) { return false; } + } + return true; + } + + size_t buf_size() const { return buf_epos_ - buf_spos_; } + + const char *buf_data() const { return &buf_[buf_spos_]; } + + std::string buf_head(size_t l) const { return buf_.substr(buf_spos_, l); } + + bool buf_start_with(const std::string &s) const { + return start_with(buf_, buf_spos_, buf_epos_, s); + } + + size_t buf_find(const std::string &s) const { + auto c = s.front(); + + size_t off = buf_spos_; + while (off < buf_epos_) { + auto pos = off; + while (true) { + if (pos == buf_epos_) { return buf_size(); } + if (buf_[pos] == c) { break; } + pos++; + } + + auto remaining_size = buf_epos_ - pos; + if (s.size() > remaining_size) { return buf_size(); } + + if (start_with(buf_, pos, buf_epos_, s)) { return pos - buf_spos_; } + + off = pos + 1; + } + + return buf_size(); + } + + void buf_append(const char *data, size_t n) { + auto remaining_size = buf_size(); + if (remaining_size > 0 && buf_spos_ > 0) { + for (size_t i = 0; i < remaining_size; i++) { + buf_[i] = buf_[buf_spos_ + i]; + } + } + buf_spos_ = 0; + buf_epos_ = remaining_size; + + if (remaining_size + n > buf_.size()) { buf_.resize(remaining_size + n); } + + for (size_t i = 0; i < n; i++) { + buf_[buf_epos_ + i] = data[i]; + } + buf_epos_ += n; + } + + void buf_erase(size_t size) { buf_spos_ += size; } + + std::string buf_; + size_t buf_spos_ = 0; + size_t buf_epos_ = 0; +}; + +inline std::string to_lower(const char *beg, const char *end) { + std::string out; + auto it = beg; + while (it != end) { + out += static_cast(::tolower(*it)); + it++; + } + return out; +} + +inline std::string make_multipart_data_boundary() { + static const char data[] = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + // std::random_device might actually be deterministic on some + // platforms, but due to lack of support in the c++ standard library, + // doing better requires either some ugly hacks or breaking portability. + std::random_device seed_gen; + + // Request 128 bits of entropy for initialization + std::seed_seq seed_sequence{seed_gen(), seed_gen(), seed_gen(), seed_gen()}; + std::mt19937 engine(seed_sequence); + + std::string result = "--cpp-httplib-multipart-data-"; + + for (auto i = 0; i < 16; i++) { + result += data[engine() % (sizeof(data) - 1)]; + } + + return result; +} + +inline bool is_multipart_boundary_chars_valid(const std::string &boundary) { + auto valid = true; + for (size_t i = 0; i < boundary.size(); i++) { + auto c = boundary[i]; + if (!std::isalnum(c) && c != '-' && c != '_') { + valid = false; + break; + } + } + return valid; +} + +template +inline std::string +serialize_multipart_formdata_item_begin(const T &item, + const std::string &boundary) { + std::string body = "--" + boundary + "\r\n"; + body += "Content-Disposition: form-data; name=\"" + item.name + "\""; + if (!item.filename.empty()) { + body += "; filename=\"" + item.filename + "\""; + } + body += "\r\n"; + if (!item.content_type.empty()) { + body += "Content-Type: " + item.content_type + "\r\n"; + } + body += "\r\n"; + + return body; +} + +inline std::string serialize_multipart_formdata_item_end() { return "\r\n"; } + +inline std::string +serialize_multipart_formdata_finish(const std::string &boundary) { + return "--" + boundary + "--\r\n"; +} + +inline std::string +serialize_multipart_formdata_get_content_type(const std::string &boundary) { + return "multipart/form-data; boundary=" + boundary; +} + +inline std::string +serialize_multipart_formdata(const MultipartFormDataItems &items, + const std::string &boundary, bool finish = true) { + std::string body; + + for (const auto &item : items) { + body += serialize_multipart_formdata_item_begin(item, boundary); + body += item.content + serialize_multipart_formdata_item_end(); + } + + if (finish) body += serialize_multipart_formdata_finish(boundary); + + return body; +} + +inline std::pair +get_range_offset_and_length(const Request &req, size_t content_length, + size_t index) { + auto r = req.ranges[index]; + + if (r.first == -1 && r.second == -1) { + return std::make_pair(0, content_length); + } + + auto slen = static_cast(content_length); + + if (r.first == -1) { + r.first = (std::max)(static_cast(0), slen - r.second); + r.second = slen - 1; + } + + if (r.second == -1) { r.second = slen - 1; } + return std::make_pair(r.first, static_cast(r.second - r.first) + 1); +} + +inline std::string make_content_range_header_field(size_t offset, size_t length, + size_t content_length) { + std::string field = "bytes "; + field += std::to_string(offset); + field += "-"; + field += std::to_string(offset + length - 1); + field += "/"; + field += std::to_string(content_length); + return field; +} + +template +bool process_multipart_ranges_data(const Request &req, Response &res, + const std::string &boundary, + const std::string &content_type, + SToken stoken, CToken ctoken, + Content content) { + for (size_t i = 0; i < req.ranges.size(); i++) { + ctoken("--"); + stoken(boundary); + ctoken("\r\n"); + if (!content_type.empty()) { + ctoken("Content-Type: "); + stoken(content_type); + ctoken("\r\n"); + } + + auto offsets = get_range_offset_and_length(req, res.body.size(), i); + auto offset = offsets.first; + auto length = offsets.second; + + ctoken("Content-Range: "); + stoken(make_content_range_header_field(offset, length, res.body.size())); + ctoken("\r\n"); + ctoken("\r\n"); + if (!content(offset, length)) { return false; } + ctoken("\r\n"); + } + + ctoken("--"); + stoken(boundary); + ctoken("--\r\n"); + + return true; +} + +inline bool make_multipart_ranges_data(const Request &req, Response &res, + const std::string &boundary, + const std::string &content_type, + std::string &data) { + return process_multipart_ranges_data( + req, res, boundary, content_type, + [&](const std::string &token) { data += token; }, + [&](const std::string &token) { data += token; }, + [&](size_t offset, size_t length) { + if (offset < res.body.size()) { + data += res.body.substr(offset, length); + return true; + } + return false; + }); +} + +inline size_t +get_multipart_ranges_data_length(const Request &req, Response &res, + const std::string &boundary, + const std::string &content_type) { + size_t data_length = 0; + + process_multipart_ranges_data( + req, res, boundary, content_type, + [&](const std::string &token) { data_length += token.size(); }, + [&](const std::string &token) { data_length += token.size(); }, + [&](size_t /*offset*/, size_t length) { + data_length += length; + return true; + }); + + return data_length; +} + +template +inline bool write_multipart_ranges_data(Stream &strm, const Request &req, + Response &res, + const std::string &boundary, + const std::string &content_type, + const T &is_shutting_down) { + return process_multipart_ranges_data( + req, res, boundary, content_type, + [&](const std::string &token) { strm.write(token); }, + [&](const std::string &token) { strm.write(token); }, + [&](size_t offset, size_t length) { + return write_content(strm, res.content_provider_, offset, length, + is_shutting_down); + }); +} + +inline std::pair +get_range_offset_and_length(const Request &req, const Response &res, + size_t index) { + auto r = req.ranges[index]; + + if (r.second == -1) { + r.second = static_cast(res.content_length_) - 1; + } + + return std::make_pair(r.first, r.second - r.first + 1); +} + +inline bool expect_content(const Request &req) { + if (req.method == "POST" || req.method == "PUT" || req.method == "PATCH" || + req.method == "PRI" || req.method == "DELETE") { + return true; + } + // TODO: check if Content-Length is set + return false; +} + +inline bool has_crlf(const std::string &s) { + auto p = s.c_str(); + while (*p) { + if (*p == '\r' || *p == '\n') { return true; } + p++; + } + return false; +} + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline std::string message_digest(const std::string &s, const EVP_MD *algo) { + auto context = std::unique_ptr( + EVP_MD_CTX_new(), EVP_MD_CTX_free); + + unsigned int hash_length = 0; + unsigned char hash[EVP_MAX_MD_SIZE]; + + EVP_DigestInit_ex(context.get(), algo, nullptr); + EVP_DigestUpdate(context.get(), s.c_str(), s.size()); + EVP_DigestFinal_ex(context.get(), hash, &hash_length); + + std::stringstream ss; + for (auto i = 0u; i < hash_length; ++i) { + ss << std::hex << std::setw(2) << std::setfill('0') + << (unsigned int)hash[i]; + } + + return ss.str(); +} + +inline std::string MD5(const std::string &s) { + return message_digest(s, EVP_md5()); +} + +inline std::string SHA_256(const std::string &s) { + return message_digest(s, EVP_sha256()); +} + +inline std::string SHA_512(const std::string &s) { + return message_digest(s, EVP_sha512()); +} +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +#ifdef _WIN32 +// NOTE: This code came up with the following stackoverflow post: +// https://stackoverflow.com/questions/9507184/can-openssl-on-windows-use-the-system-certificate-store +inline bool load_system_certs_on_windows(X509_STORE *store) { + auto hStore = CertOpenSystemStoreW((HCRYPTPROV_LEGACY)NULL, L"ROOT"); + if (!hStore) { return false; } + + auto result = false; + PCCERT_CONTEXT pContext = NULL; + while ((pContext = CertEnumCertificatesInStore(hStore, pContext)) != + nullptr) { + auto encoded_cert = + static_cast(pContext->pbCertEncoded); + + auto x509 = d2i_X509(NULL, &encoded_cert, pContext->cbCertEncoded); + if (x509) { + X509_STORE_add_cert(store, x509); + X509_free(x509); + result = true; + } + } + + CertFreeCertificateContext(pContext); + CertCloseStore(hStore, 0); + + return result; +} +#elif defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) && defined(__APPLE__) +#if TARGET_OS_OSX +template +using CFObjectPtr = + std::unique_ptr::type, void (*)(CFTypeRef)>; + +inline void cf_object_ptr_deleter(CFTypeRef obj) { + if (obj) { CFRelease(obj); } +} + +inline bool retrieve_certs_from_keychain(CFObjectPtr &certs) { + CFStringRef keys[] = {kSecClass, kSecMatchLimit, kSecReturnRef}; + CFTypeRef values[] = {kSecClassCertificate, kSecMatchLimitAll, + kCFBooleanTrue}; + + CFObjectPtr query( + CFDictionaryCreate(nullptr, reinterpret_cast(keys), values, + sizeof(keys) / sizeof(keys[0]), + &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks), + cf_object_ptr_deleter); + + if (!query) { return false; } + + CFTypeRef security_items = nullptr; + if (SecItemCopyMatching(query.get(), &security_items) != errSecSuccess || + CFArrayGetTypeID() != CFGetTypeID(security_items)) { + return false; + } + + certs.reset(reinterpret_cast(security_items)); + return true; +} + +inline bool retrieve_root_certs_from_keychain(CFObjectPtr &certs) { + CFArrayRef root_security_items = nullptr; + if (SecTrustCopyAnchorCertificates(&root_security_items) != errSecSuccess) { + return false; + } + + certs.reset(root_security_items); + return true; +} + +inline bool add_certs_to_x509_store(CFArrayRef certs, X509_STORE *store) { + auto result = false; + for (int i = 0; i < CFArrayGetCount(certs); ++i) { + const auto cert = reinterpret_cast( + CFArrayGetValueAtIndex(certs, i)); + + if (SecCertificateGetTypeID() != CFGetTypeID(cert)) { continue; } + + CFDataRef cert_data = nullptr; + if (SecItemExport(cert, kSecFormatX509Cert, 0, nullptr, &cert_data) != + errSecSuccess) { + continue; + } + + CFObjectPtr cert_data_ptr(cert_data, cf_object_ptr_deleter); + + auto encoded_cert = static_cast( + CFDataGetBytePtr(cert_data_ptr.get())); + + auto x509 = + d2i_X509(NULL, &encoded_cert, CFDataGetLength(cert_data_ptr.get())); + + if (x509) { + X509_STORE_add_cert(store, x509); + X509_free(x509); + result = true; + } + } + + return result; +} + +inline bool load_system_certs_on_macos(X509_STORE *store) { + auto result = false; + CFObjectPtr certs(nullptr, cf_object_ptr_deleter); + if (retrieve_certs_from_keychain(certs) && certs) { + result = add_certs_to_x509_store(certs.get(), store); + } + + if (retrieve_root_certs_from_keychain(certs) && certs) { + result = add_certs_to_x509_store(certs.get(), store) || result; + } + + return result; +} +#endif // TARGET_OS_OSX +#endif // _WIN32 +#endif // CPPHTTPLIB_OPENSSL_SUPPORT + +#ifdef _WIN32 +class WSInit { +public: + WSInit() { + WSADATA wsaData; + if (WSAStartup(0x0002, &wsaData) == 0) is_valid_ = true; + } + + ~WSInit() { + if (is_valid_) WSACleanup(); + } + + bool is_valid_ = false; +}; + +static WSInit wsinit_; +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline std::pair make_digest_authentication_header( + const Request &req, const std::map &auth, + size_t cnonce_count, const std::string &cnonce, const std::string &username, + const std::string &password, bool is_proxy = false) { + std::string nc; + { + std::stringstream ss; + ss << std::setfill('0') << std::setw(8) << std::hex << cnonce_count; + nc = ss.str(); + } + + std::string qop; + if (auth.find("qop") != auth.end()) { + qop = auth.at("qop"); + if (qop.find("auth-int") != std::string::npos) { + qop = "auth-int"; + } else if (qop.find("auth") != std::string::npos) { + qop = "auth"; + } else { + qop.clear(); + } + } + + std::string algo = "MD5"; + if (auth.find("algorithm") != auth.end()) { algo = auth.at("algorithm"); } + + std::string response; + { + auto H = algo == "SHA-256" ? detail::SHA_256 + : algo == "SHA-512" ? detail::SHA_512 + : detail::MD5; + + auto A1 = username + ":" + auth.at("realm") + ":" + password; + + auto A2 = req.method + ":" + req.path; + if (qop == "auth-int") { A2 += ":" + H(req.body); } + + if (qop.empty()) { + response = H(H(A1) + ":" + auth.at("nonce") + ":" + H(A2)); + } else { + response = H(H(A1) + ":" + auth.at("nonce") + ":" + nc + ":" + cnonce + + ":" + qop + ":" + H(A2)); + } + } + + auto opaque = (auth.find("opaque") != auth.end()) ? auth.at("opaque") : ""; + + auto field = "Digest username=\"" + username + "\", realm=\"" + + auth.at("realm") + "\", nonce=\"" + auth.at("nonce") + + "\", uri=\"" + req.path + "\", algorithm=" + algo + + (qop.empty() ? ", response=\"" + : ", qop=" + qop + ", nc=" + nc + ", cnonce=\"" + + cnonce + "\", response=\"") + + response + "\"" + + (opaque.empty() ? "" : ", opaque=\"" + opaque + "\""); + + auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; + return std::make_pair(key, field); +} +#endif + +inline bool parse_www_authenticate(const Response &res, + std::map &auth, + bool is_proxy) { + auto auth_key = is_proxy ? "Proxy-Authenticate" : "WWW-Authenticate"; + if (res.has_header(auth_key)) { + static auto re = std::regex(R"~((?:(?:,\s*)?(.+?)=(?:"(.*?)"|([^,]*))))~"); + auto s = res.get_header_value(auth_key); + auto pos = s.find(' '); + if (pos != std::string::npos) { + auto type = s.substr(0, pos); + if (type == "Basic") { + return false; + } else if (type == "Digest") { + s = s.substr(pos + 1); + auto beg = std::sregex_iterator(s.begin(), s.end(), re); + for (auto i = beg; i != std::sregex_iterator(); ++i) { + auto m = *i; + auto key = s.substr(static_cast(m.position(1)), + static_cast(m.length(1))); + auto val = m.length(2) > 0 + ? s.substr(static_cast(m.position(2)), + static_cast(m.length(2))) + : s.substr(static_cast(m.position(3)), + static_cast(m.length(3))); + auth[key] = val; + } + return true; + } + } + } + return false; +} + +// https://stackoverflow.com/questions/440133/how-do-i-create-a-random-alpha-numeric-string-in-c/440240#answer-440240 +inline std::string random_string(size_t length) { + auto randchar = []() -> char { + const char charset[] = "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz"; + const size_t max_index = (sizeof(charset) - 1); + return charset[static_cast(std::rand()) % max_index]; + }; + std::string str(length, 0); + std::generate_n(str.begin(), length, randchar); + return str; +} + +class ContentProviderAdapter { +public: + explicit ContentProviderAdapter( + ContentProviderWithoutLength &&content_provider) + : content_provider_(content_provider) {} + + bool operator()(size_t offset, size_t, DataSink &sink) { + return content_provider_(offset, sink); + } + +private: + ContentProviderWithoutLength content_provider_; +}; + +} // namespace detail + +inline std::string hosted_at(const std::string &hostname) { + std::vector addrs; + hosted_at(hostname, addrs); + if (addrs.empty()) { return std::string(); } + return addrs[0]; +} + +inline void hosted_at(const std::string &hostname, + std::vector &addrs) { + struct addrinfo hints; + struct addrinfo *result; + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = 0; + + if (getaddrinfo(hostname.c_str(), nullptr, &hints, &result)) { +#if defined __linux__ && !defined __ANDROID__ + res_init(); +#endif + return; + } + + for (auto rp = result; rp; rp = rp->ai_next) { + const auto &addr = + *reinterpret_cast(rp->ai_addr); + std::string ip; + int dummy = -1; + if (detail::get_ip_and_port(addr, sizeof(struct sockaddr_storage), ip, + dummy)) { + addrs.push_back(ip); + } + } + + freeaddrinfo(result); +} + +inline std::string append_query_params(const std::string &path, + const Params ¶ms) { + std::string path_with_query = path; + const static std::regex re("[^?]+\\?.*"); + auto delm = std::regex_match(path, re) ? '&' : '?'; + path_with_query += delm + detail::params_to_query_str(params); + return path_with_query; +} + +// Header utilities +inline std::pair make_range_header(Ranges ranges) { + std::string field = "bytes="; + auto i = 0; + for (auto r : ranges) { + if (i != 0) { field += ", "; } + if (r.first != -1) { field += std::to_string(r.first); } + field += '-'; + if (r.second != -1) { field += std::to_string(r.second); } + i++; + } + return std::make_pair("Range", std::move(field)); +} + +inline std::pair +make_basic_authentication_header(const std::string &username, + const std::string &password, bool is_proxy) { + auto field = "Basic " + detail::base64_encode(username + ":" + password); + auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; + return std::make_pair(key, std::move(field)); +} + +inline std::pair +make_bearer_token_authentication_header(const std::string &token, + bool is_proxy = false) { + auto field = "Bearer " + token; + auto key = is_proxy ? "Proxy-Authorization" : "Authorization"; + return std::make_pair(key, std::move(field)); +} + +// Request implementation +inline bool Request::has_header(const std::string &key) const { + return detail::has_header(headers, key); +} + +inline std::string Request::get_header_value(const std::string &key, + size_t id) const { + return detail::get_header_value(headers, key, id, ""); +} + +inline size_t Request::get_header_value_count(const std::string &key) const { + auto r = headers.equal_range(key); + return static_cast(std::distance(r.first, r.second)); +} + +inline void Request::set_header(const std::string &key, + const std::string &val) { + if (!detail::has_crlf(key) && !detail::has_crlf(val)) { + headers.emplace(key, val); + } +} + +inline bool Request::has_param(const std::string &key) const { + return params.find(key) != params.end(); +} + +inline std::string Request::get_param_value(const std::string &key, + size_t id) const { + auto rng = params.equal_range(key); + auto it = rng.first; + std::advance(it, static_cast(id)); + if (it != rng.second) { return it->second; } + return std::string(); +} + +inline size_t Request::get_param_value_count(const std::string &key) const { + auto r = params.equal_range(key); + return static_cast(std::distance(r.first, r.second)); +} + +inline bool Request::is_multipart_form_data() const { + const auto &content_type = get_header_value("Content-Type"); + return !content_type.rfind("multipart/form-data", 0); +} + +inline bool Request::has_file(const std::string &key) const { + return files.find(key) != files.end(); +} + +inline MultipartFormData Request::get_file_value(const std::string &key) const { + auto it = files.find(key); + if (it != files.end()) { return it->second; } + return MultipartFormData(); +} + +inline std::vector +Request::get_file_values(const std::string &key) const { + std::vector values; + auto rng = files.equal_range(key); + for (auto it = rng.first; it != rng.second; it++) { + values.push_back(it->second); + } + return values; +} + +// Response implementation +inline bool Response::has_header(const std::string &key) const { + return headers.find(key) != headers.end(); +} + +inline std::string Response::get_header_value(const std::string &key, + size_t id) const { + return detail::get_header_value(headers, key, id, ""); +} + +inline size_t Response::get_header_value_count(const std::string &key) const { + auto r = headers.equal_range(key); + return static_cast(std::distance(r.first, r.second)); +} + +inline void Response::set_header(const std::string &key, + const std::string &val) { + if (!detail::has_crlf(key) && !detail::has_crlf(val)) { + headers.emplace(key, val); + } +} + +inline void Response::set_redirect(const std::string &url, int stat) { + if (!detail::has_crlf(url)) { + set_header("Location", url); + if (300 <= stat && stat < 400) { + this->status = stat; + } else { + this->status = 302; + } + } +} + +inline void Response::set_content(const char *s, size_t n, + const std::string &content_type) { + body.assign(s, n); + + auto rng = headers.equal_range("Content-Type"); + headers.erase(rng.first, rng.second); + set_header("Content-Type", content_type); +} + +inline void Response::set_content(const std::string &s, + const std::string &content_type) { + set_content(s.data(), s.size(), content_type); +} + +inline void Response::set_content_provider( + size_t in_length, const std::string &content_type, ContentProvider provider, + ContentProviderResourceReleaser resource_releaser) { + set_header("Content-Type", content_type); + content_length_ = in_length; + if (in_length > 0) { content_provider_ = std::move(provider); } + content_provider_resource_releaser_ = resource_releaser; + is_chunked_content_provider_ = false; +} + +inline void Response::set_content_provider( + const std::string &content_type, ContentProviderWithoutLength provider, + ContentProviderResourceReleaser resource_releaser) { + set_header("Content-Type", content_type); + content_length_ = 0; + content_provider_ = detail::ContentProviderAdapter(std::move(provider)); + content_provider_resource_releaser_ = resource_releaser; + is_chunked_content_provider_ = false; +} + +inline void Response::set_chunked_content_provider( + const std::string &content_type, ContentProviderWithoutLength provider, + ContentProviderResourceReleaser resource_releaser) { + set_header("Content-Type", content_type); + content_length_ = 0; + content_provider_ = detail::ContentProviderAdapter(std::move(provider)); + content_provider_resource_releaser_ = resource_releaser; + is_chunked_content_provider_ = true; +} + +// Result implementation +inline bool Result::has_request_header(const std::string &key) const { + return request_headers_.find(key) != request_headers_.end(); +} + +inline std::string Result::get_request_header_value(const std::string &key, + size_t id) const { + return detail::get_header_value(request_headers_, key, id, ""); +} + +inline size_t +Result::get_request_header_value_count(const std::string &key) const { + auto r = request_headers_.equal_range(key); + return static_cast(std::distance(r.first, r.second)); +} + +// Stream implementation +inline ssize_t Stream::write(const char *ptr) { + return write(ptr, strlen(ptr)); +} + +inline ssize_t Stream::write(const std::string &s) { + return write(s.data(), s.size()); +} + +namespace detail { + +// Socket stream implementation +inline SocketStream::SocketStream(socket_t sock, time_t read_timeout_sec, + time_t read_timeout_usec, + time_t write_timeout_sec, + time_t write_timeout_usec) + : sock_(sock), read_timeout_sec_(read_timeout_sec), + read_timeout_usec_(read_timeout_usec), + write_timeout_sec_(write_timeout_sec), + write_timeout_usec_(write_timeout_usec), read_buff_(read_buff_size_, 0) {} + +inline SocketStream::~SocketStream() {} + +inline bool SocketStream::is_readable() const { + return select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; +} + +inline bool SocketStream::is_writable() const { + return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 && + is_socket_alive(sock_); +} + +inline ssize_t SocketStream::read(char *ptr, size_t size) { +#ifdef _WIN32 + size = + (std::min)(size, static_cast((std::numeric_limits::max)())); +#else + size = (std::min)(size, + static_cast((std::numeric_limits::max)())); +#endif + + if (read_buff_off_ < read_buff_content_size_) { + auto remaining_size = read_buff_content_size_ - read_buff_off_; + if (size <= remaining_size) { + memcpy(ptr, read_buff_.data() + read_buff_off_, size); + read_buff_off_ += size; + return static_cast(size); + } else { + memcpy(ptr, read_buff_.data() + read_buff_off_, remaining_size); + read_buff_off_ += remaining_size; + return static_cast(remaining_size); + } + } + + if (!is_readable()) { return -1; } + + read_buff_off_ = 0; + read_buff_content_size_ = 0; + + if (size < read_buff_size_) { + auto n = read_socket(sock_, read_buff_.data(), read_buff_size_, + CPPHTTPLIB_RECV_FLAGS); + if (n <= 0) { + return n; + } else if (n <= static_cast(size)) { + memcpy(ptr, read_buff_.data(), static_cast(n)); + return n; + } else { + memcpy(ptr, read_buff_.data(), size); + read_buff_off_ = size; + read_buff_content_size_ = static_cast(n); + return static_cast(size); + } + } else { + return read_socket(sock_, ptr, size, CPPHTTPLIB_RECV_FLAGS); + } +} + +inline ssize_t SocketStream::write(const char *ptr, size_t size) { + if (!is_writable()) { return -1; } + +#if defined(_WIN32) && !defined(_WIN64) + size = + (std::min)(size, static_cast((std::numeric_limits::max)())); +#endif + + return send_socket(sock_, ptr, size, CPPHTTPLIB_SEND_FLAGS); +} + +inline void SocketStream::get_remote_ip_and_port(std::string &ip, + int &port) const { + return detail::get_remote_ip_and_port(sock_, ip, port); +} + +inline void SocketStream::get_local_ip_and_port(std::string &ip, + int &port) const { + return detail::get_local_ip_and_port(sock_, ip, port); +} + +inline socket_t SocketStream::socket() const { return sock_; } + +// Buffer stream implementation +inline bool BufferStream::is_readable() const { return true; } + +inline bool BufferStream::is_writable() const { return true; } + +inline ssize_t BufferStream::read(char *ptr, size_t size) { +#if defined(_MSC_VER) && _MSC_VER < 1910 + auto len_read = buffer._Copy_s(ptr, size, size, position); +#else + auto len_read = buffer.copy(ptr, size, position); +#endif + position += static_cast(len_read); + return static_cast(len_read); +} + +inline ssize_t BufferStream::write(const char *ptr, size_t size) { + buffer.append(ptr, size); + return static_cast(size); +} + +inline void BufferStream::get_remote_ip_and_port(std::string & /*ip*/, + int & /*port*/) const {} + +inline void BufferStream::get_local_ip_and_port(std::string & /*ip*/, + int & /*port*/) const {} + +inline socket_t BufferStream::socket() const { return 0; } + +inline const std::string &BufferStream::get_buffer() const { return buffer; } + +} // namespace detail + +// HTTP server implementation +inline Server::Server() + : new_task_queue( + [] { return new ThreadPool(CPPHTTPLIB_THREAD_POOL_COUNT); }) { +#ifndef _WIN32 + signal(SIGPIPE, SIG_IGN); +#endif +} + +inline Server::~Server() {} + +inline Server &Server::Get(const std::string &pattern, Handler handler) { + get_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline Server &Server::Post(const std::string &pattern, Handler handler) { + post_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline Server &Server::Post(const std::string &pattern, + HandlerWithContentReader handler) { + post_handlers_for_content_reader_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline Server &Server::Put(const std::string &pattern, Handler handler) { + put_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline Server &Server::Put(const std::string &pattern, + HandlerWithContentReader handler) { + put_handlers_for_content_reader_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline Server &Server::Patch(const std::string &pattern, Handler handler) { + patch_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline Server &Server::Patch(const std::string &pattern, + HandlerWithContentReader handler) { + patch_handlers_for_content_reader_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline Server &Server::Delete(const std::string &pattern, Handler handler) { + delete_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline Server &Server::Delete(const std::string &pattern, + HandlerWithContentReader handler) { + delete_handlers_for_content_reader_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline Server &Server::Options(const std::string &pattern, Handler handler) { + options_handlers_.push_back( + std::make_pair(std::regex(pattern), std::move(handler))); + return *this; +} + +inline bool Server::set_base_dir(const std::string &dir, + const std::string &mount_point) { + return set_mount_point(mount_point, dir); +} + +inline bool Server::set_mount_point(const std::string &mount_point, + const std::string &dir, Headers headers) { + if (detail::is_dir(dir)) { + std::string mnt = !mount_point.empty() ? mount_point : "/"; + if (!mnt.empty() && mnt[0] == '/') { + base_dirs_.push_back({mnt, dir, std::move(headers)}); + return true; + } + } + return false; +} + +inline bool Server::remove_mount_point(const std::string &mount_point) { + for (auto it = base_dirs_.begin(); it != base_dirs_.end(); ++it) { + if (it->mount_point == mount_point) { + base_dirs_.erase(it); + return true; + } + } + return false; +} + +inline Server & +Server::set_file_extension_and_mimetype_mapping(const std::string &ext, + const std::string &mime) { + file_extension_and_mimetype_map_[ext] = mime; + return *this; +} + +inline Server &Server::set_file_request_handler(Handler handler) { + file_request_handler_ = std::move(handler); + return *this; +} + +inline Server &Server::set_error_handler(HandlerWithResponse handler) { + error_handler_ = std::move(handler); + return *this; +} + +inline Server &Server::set_error_handler(Handler handler) { + error_handler_ = [handler](const Request &req, Response &res) { + handler(req, res); + return HandlerResponse::Handled; + }; + return *this; +} + +inline Server &Server::set_exception_handler(ExceptionHandler handler) { + exception_handler_ = std::move(handler); + return *this; +} + +inline Server &Server::set_pre_routing_handler(HandlerWithResponse handler) { + pre_routing_handler_ = std::move(handler); + return *this; +} + +inline Server &Server::set_post_routing_handler(Handler handler) { + post_routing_handler_ = std::move(handler); + return *this; +} + +inline Server &Server::set_logger(Logger logger) { + logger_ = std::move(logger); + return *this; +} + +inline Server & +Server::set_expect_100_continue_handler(Expect100ContinueHandler handler) { + expect_100_continue_handler_ = std::move(handler); + + return *this; +} + +inline Server &Server::set_address_family(int family) { + address_family_ = family; + return *this; +} + +inline Server &Server::set_tcp_nodelay(bool on) { + tcp_nodelay_ = on; + return *this; +} + +inline Server &Server::set_socket_options(SocketOptions socket_options) { + socket_options_ = std::move(socket_options); + return *this; +} + +inline Server &Server::set_default_headers(Headers headers) { + default_headers_ = std::move(headers); + return *this; +} + +inline Server &Server::set_keep_alive_max_count(size_t count) { + keep_alive_max_count_ = count; + return *this; +} + +inline Server &Server::set_keep_alive_timeout(time_t sec) { + keep_alive_timeout_sec_ = sec; + return *this; +} + +inline Server &Server::set_read_timeout(time_t sec, time_t usec) { + read_timeout_sec_ = sec; + read_timeout_usec_ = usec; + return *this; +} + +inline Server &Server::set_write_timeout(time_t sec, time_t usec) { + write_timeout_sec_ = sec; + write_timeout_usec_ = usec; + return *this; +} + +inline Server &Server::set_idle_interval(time_t sec, time_t usec) { + idle_interval_sec_ = sec; + idle_interval_usec_ = usec; + return *this; +} + +inline Server &Server::set_payload_max_length(size_t length) { + payload_max_length_ = length; + return *this; +} + +inline bool Server::bind_to_port(const std::string &host, int port, + int socket_flags) { + if (bind_internal(host, port, socket_flags) < 0) return false; + return true; +} +inline int Server::bind_to_any_port(const std::string &host, int socket_flags) { + return bind_internal(host, 0, socket_flags); +} + +inline bool Server::listen_after_bind() { + auto se = detail::scope_exit([&]() { done_ = true; }); + return listen_internal(); +} + +inline bool Server::listen(const std::string &host, int port, + int socket_flags) { + auto se = detail::scope_exit([&]() { done_ = true; }); + return bind_to_port(host, port, socket_flags) && listen_internal(); +} + +inline bool Server::is_running() const { return is_running_; } + +inline void Server::wait_until_ready() const { + while (!is_running() && !done_) { + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + } +} + +inline void Server::stop() { + if (is_running_) { + assert(svr_sock_ != INVALID_SOCKET); + std::atomic sock(svr_sock_.exchange(INVALID_SOCKET)); + detail::shutdown_socket(sock); + detail::close_socket(sock); + } +} + +inline bool Server::parse_request_line(const char *s, Request &req) { + auto len = strlen(s); + if (len < 2 || s[len - 2] != '\r' || s[len - 1] != '\n') { return false; } + len -= 2; + + { + size_t count = 0; + + detail::split(s, s + len, ' ', [&](const char *b, const char *e) { + switch (count) { + case 0: req.method = std::string(b, e); break; + case 1: req.target = std::string(b, e); break; + case 2: req.version = std::string(b, e); break; + default: break; + } + count++; + }); + + if (count != 3) { return false; } + } + + static const std::set methods{ + "GET", "HEAD", "POST", "PUT", "DELETE", + "CONNECT", "OPTIONS", "TRACE", "PATCH", "PRI"}; + + if (methods.find(req.method) == methods.end()) { return false; } + + if (req.version != "HTTP/1.1" && req.version != "HTTP/1.0") { return false; } + + { + // Skip URL fragment + for (size_t i = 0; i < req.target.size(); i++) { + if (req.target[i] == '#') { + req.target.erase(i); + break; + } + } + + size_t count = 0; + + detail::split(req.target.data(), req.target.data() + req.target.size(), '?', + [&](const char *b, const char *e) { + switch (count) { + case 0: + req.path = detail::decode_url(std::string(b, e), false); + break; + case 1: { + if (e - b > 0) { + detail::parse_query_text(std::string(b, e), req.params); + } + break; + } + default: break; + } + count++; + }); + + if (count > 2) { return false; } + } + + return true; +} + +inline bool Server::write_response(Stream &strm, bool close_connection, + const Request &req, Response &res) { + return write_response_core(strm, close_connection, req, res, false); +} + +inline bool Server::write_response_with_content(Stream &strm, + bool close_connection, + const Request &req, + Response &res) { + return write_response_core(strm, close_connection, req, res, true); +} + +inline bool Server::write_response_core(Stream &strm, bool close_connection, + const Request &req, Response &res, + bool need_apply_ranges) { + assert(res.status != -1); + + if (400 <= res.status && error_handler_ && + error_handler_(req, res) == HandlerResponse::Handled) { + need_apply_ranges = true; + } + + std::string content_type; + std::string boundary; + if (need_apply_ranges) { apply_ranges(req, res, content_type, boundary); } + + // Prepare additional headers + if (close_connection || req.get_header_value("Connection") == "close") { + res.set_header("Connection", "close"); + } else { + std::stringstream ss; + ss << "timeout=" << keep_alive_timeout_sec_ + << ", max=" << keep_alive_max_count_; + res.set_header("Keep-Alive", ss.str()); + } + + if (!res.has_header("Content-Type") && + (!res.body.empty() || res.content_length_ > 0 || res.content_provider_)) { + res.set_header("Content-Type", "text/plain"); + } + + if (!res.has_header("Content-Length") && res.body.empty() && + !res.content_length_ && !res.content_provider_) { + res.set_header("Content-Length", "0"); + } + + if (!res.has_header("Accept-Ranges") && req.method == "HEAD") { + res.set_header("Accept-Ranges", "bytes"); + } + + if (post_routing_handler_) { post_routing_handler_(req, res); } + + // Response line and headers + { + detail::BufferStream bstrm; + + if (!bstrm.write_format("HTTP/1.1 %d %s\r\n", res.status, + detail::status_message(res.status))) { + return false; + } + + if (!detail::write_headers(bstrm, res.headers)) { return false; } + + // Flush buffer + auto &data = bstrm.get_buffer(); + detail::write_data(strm, data.data(), data.size()); + } + + // Body + auto ret = true; + if (req.method != "HEAD") { + if (!res.body.empty()) { + if (!detail::write_data(strm, res.body.data(), res.body.size())) { + ret = false; + } + } else if (res.content_provider_) { + if (write_content_with_provider(strm, req, res, boundary, content_type)) { + res.content_provider_success_ = true; + } else { + res.content_provider_success_ = false; + ret = false; + } + } + } + + // Log + if (logger_) { logger_(req, res); } + + return ret; +} + +inline bool +Server::write_content_with_provider(Stream &strm, const Request &req, + Response &res, const std::string &boundary, + const std::string &content_type) { + auto is_shutting_down = [this]() { + return this->svr_sock_ == INVALID_SOCKET; + }; + + if (res.content_length_ > 0) { + if (req.ranges.empty()) { + return detail::write_content(strm, res.content_provider_, 0, + res.content_length_, is_shutting_down); + } else if (req.ranges.size() == 1) { + auto offsets = + detail::get_range_offset_and_length(req, res.content_length_, 0); + auto offset = offsets.first; + auto length = offsets.second; + return detail::write_content(strm, res.content_provider_, offset, length, + is_shutting_down); + } else { + return detail::write_multipart_ranges_data( + strm, req, res, boundary, content_type, is_shutting_down); + } + } else { + if (res.is_chunked_content_provider_) { + auto type = detail::encoding_type(req, res); + + std::unique_ptr compressor; + if (type == detail::EncodingType::Gzip) { +#ifdef CPPHTTPLIB_ZLIB_SUPPORT + compressor = detail::make_unique(); +#endif + } else if (type == detail::EncodingType::Brotli) { +#ifdef CPPHTTPLIB_BROTLI_SUPPORT + compressor = detail::make_unique(); +#endif + } else { + compressor = detail::make_unique(); + } + assert(compressor != nullptr); + + return detail::write_content_chunked(strm, res.content_provider_, + is_shutting_down, *compressor); + } else { + return detail::write_content_without_length(strm, res.content_provider_, + is_shutting_down); + } + } +} + +inline bool Server::read_content(Stream &strm, Request &req, Response &res) { + MultipartFormDataMap::iterator cur; + auto file_count = 0; + if (read_content_core( + strm, req, res, + // Regular + [&](const char *buf, size_t n) { + if (req.body.size() + n > req.body.max_size()) { return false; } + req.body.append(buf, n); + return true; + }, + // Multipart + [&](const MultipartFormData &file) { + if (file_count++ == CPPHTTPLIB_MULTIPART_FORM_DATA_FILE_MAX_COUNT) { + return false; + } + cur = req.files.emplace(file.name, file); + return true; + }, + [&](const char *buf, size_t n) { + auto &content = cur->second.content; + if (content.size() + n > content.max_size()) { return false; } + content.append(buf, n); + return true; + })) { + const auto &content_type = req.get_header_value("Content-Type"); + if (!content_type.find("application/x-www-form-urlencoded")) { + if (req.body.size() > CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH) { + res.status = 413; // NOTE: should be 414? + return false; + } + detail::parse_query_text(req.body, req.params); + } + return true; + } + return false; +} + +inline bool Server::read_content_with_content_receiver( + Stream &strm, Request &req, Response &res, ContentReceiver receiver, + MultipartContentHeader multipart_header, + ContentReceiver multipart_receiver) { + return read_content_core(strm, req, res, std::move(receiver), + std::move(multipart_header), + std::move(multipart_receiver)); +} + +inline bool Server::read_content_core(Stream &strm, Request &req, Response &res, + ContentReceiver receiver, + MultipartContentHeader multipart_header, + ContentReceiver multipart_receiver) { + detail::MultipartFormDataParser multipart_form_data_parser; + ContentReceiverWithProgress out; + + if (req.is_multipart_form_data()) { + const auto &content_type = req.get_header_value("Content-Type"); + std::string boundary; + if (!detail::parse_multipart_boundary(content_type, boundary)) { + res.status = 400; + return false; + } + + multipart_form_data_parser.set_boundary(std::move(boundary)); + out = [&](const char *buf, size_t n, uint64_t /*off*/, uint64_t /*len*/) { + /* For debug + size_t pos = 0; + while (pos < n) { + auto read_size = (std::min)(1, n - pos); + auto ret = multipart_form_data_parser.parse( + buf + pos, read_size, multipart_receiver, multipart_header); + if (!ret) { return false; } + pos += read_size; + } + return true; + */ + return multipart_form_data_parser.parse(buf, n, multipart_receiver, + multipart_header); + }; + } else { + out = [receiver](const char *buf, size_t n, uint64_t /*off*/, + uint64_t /*len*/) { return receiver(buf, n); }; + } + + if (req.method == "DELETE" && !req.has_header("Content-Length")) { + return true; + } + + if (!detail::read_content(strm, req, payload_max_length_, res.status, nullptr, + out, true)) { + return false; + } + + if (req.is_multipart_form_data()) { + if (!multipart_form_data_parser.is_valid()) { + res.status = 400; + return false; + } + } + + return true; +} + +inline bool Server::handle_file_request(const Request &req, Response &res, + bool head) { + for (const auto &entry : base_dirs_) { + // Prefix match + if (!req.path.compare(0, entry.mount_point.size(), entry.mount_point)) { + std::string sub_path = "/" + req.path.substr(entry.mount_point.size()); + if (detail::is_valid_path(sub_path)) { + auto path = entry.base_dir + sub_path; + if (path.back() == '/') { path += "index.html"; } + + if (detail::is_file(path)) { + detail::read_file(path, res.body); + auto type = + detail::find_content_type(path, file_extension_and_mimetype_map_); + if (type) { res.set_header("Content-Type", type); } + for (const auto &kv : entry.headers) { + res.set_header(kv.first.c_str(), kv.second); + } + res.status = req.has_header("Range") ? 206 : 200; + if (!head && file_request_handler_) { + file_request_handler_(req, res); + } + return true; + } + } + } + } + return false; +} + +inline socket_t +Server::create_server_socket(const std::string &host, int port, + int socket_flags, + SocketOptions socket_options) const { + return detail::create_socket( + host, std::string(), port, address_family_, socket_flags, tcp_nodelay_, + std::move(socket_options), + [](socket_t sock, struct addrinfo &ai) -> bool { + if (::bind(sock, ai.ai_addr, static_cast(ai.ai_addrlen))) { + return false; + } + if (::listen(sock, CPPHTTPLIB_LISTEN_BACKLOG)) { return false; } + return true; + }); +} + +inline int Server::bind_internal(const std::string &host, int port, + int socket_flags) { + if (!is_valid()) { return -1; } + + svr_sock_ = create_server_socket(host, port, socket_flags, socket_options_); + if (svr_sock_ == INVALID_SOCKET) { return -1; } + + if (port == 0) { + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + if (getsockname(svr_sock_, reinterpret_cast(&addr), + &addr_len) == -1) { + return -1; + } + if (addr.ss_family == AF_INET) { + return ntohs(reinterpret_cast(&addr)->sin_port); + } else if (addr.ss_family == AF_INET6) { + return ntohs(reinterpret_cast(&addr)->sin6_port); + } else { + return -1; + } + } else { + return port; + } +} + +inline bool Server::listen_internal() { + auto ret = true; + is_running_ = true; + auto se = detail::scope_exit([&]() { is_running_ = false; }); + + { + std::unique_ptr task_queue(new_task_queue()); + + while (svr_sock_ != INVALID_SOCKET) { +#ifndef _WIN32 + if (idle_interval_sec_ > 0 || idle_interval_usec_ > 0) { +#endif + auto val = detail::select_read(svr_sock_, idle_interval_sec_, + idle_interval_usec_); + if (val == 0) { // Timeout + task_queue->on_idle(); + continue; + } +#ifndef _WIN32 + } +#endif + socket_t sock = accept(svr_sock_, nullptr, nullptr); + + if (sock == INVALID_SOCKET) { + if (errno == EMFILE) { + // The per-process limit of open file descriptors has been reached. + // Try to accept new connections after a short sleep. + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } else if (errno == EINTR || errno == EAGAIN) { + continue; + } + if (svr_sock_ != INVALID_SOCKET) { + detail::close_socket(svr_sock_); + ret = false; + } else { + ; // The server socket was closed by user. + } + break; + } + + { +#ifdef _WIN32 + auto timeout = static_cast(read_timeout_sec_ * 1000 + + read_timeout_usec_ / 1000); + setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(timeout)); +#else + timeval tv; + tv.tv_sec = static_cast(read_timeout_sec_); + tv.tv_usec = static_cast(read_timeout_usec_); + setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)); +#endif + } + { + +#ifdef _WIN32 + auto timeout = static_cast(write_timeout_sec_ * 1000 + + write_timeout_usec_ / 1000); + setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, + sizeof(timeout)); +#else + timeval tv; + tv.tv_sec = static_cast(write_timeout_sec_); + tv.tv_usec = static_cast(write_timeout_usec_); + setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, sizeof(tv)); +#endif + } + + task_queue->enqueue([this, sock]() { process_and_close_socket(sock); }); + } + + task_queue->shutdown(); + } + + return ret; +} + +inline bool Server::routing(Request &req, Response &res, Stream &strm) { + if (pre_routing_handler_ && + pre_routing_handler_(req, res) == HandlerResponse::Handled) { + return true; + } + + // File handler + bool is_head_request = req.method == "HEAD"; + if ((req.method == "GET" || is_head_request) && + handle_file_request(req, res, is_head_request)) { + return true; + } + + if (detail::expect_content(req)) { + // Content reader handler + { + ContentReader reader( + [&](ContentReceiver receiver) { + return read_content_with_content_receiver( + strm, req, res, std::move(receiver), nullptr, nullptr); + }, + [&](MultipartContentHeader header, ContentReceiver receiver) { + return read_content_with_content_receiver(strm, req, res, nullptr, + std::move(header), + std::move(receiver)); + }); + + if (req.method == "POST") { + if (dispatch_request_for_content_reader( + req, res, std::move(reader), + post_handlers_for_content_reader_)) { + return true; + } + } else if (req.method == "PUT") { + if (dispatch_request_for_content_reader( + req, res, std::move(reader), + put_handlers_for_content_reader_)) { + return true; + } + } else if (req.method == "PATCH") { + if (dispatch_request_for_content_reader( + req, res, std::move(reader), + patch_handlers_for_content_reader_)) { + return true; + } + } else if (req.method == "DELETE") { + if (dispatch_request_for_content_reader( + req, res, std::move(reader), + delete_handlers_for_content_reader_)) { + return true; + } + } + } + + // Read content into `req.body` + if (!read_content(strm, req, res)) { return false; } + } + + // Regular handler + if (req.method == "GET" || req.method == "HEAD") { + return dispatch_request(req, res, get_handlers_); + } else if (req.method == "POST") { + return dispatch_request(req, res, post_handlers_); + } else if (req.method == "PUT") { + return dispatch_request(req, res, put_handlers_); + } else if (req.method == "DELETE") { + return dispatch_request(req, res, delete_handlers_); + } else if (req.method == "OPTIONS") { + return dispatch_request(req, res, options_handlers_); + } else if (req.method == "PATCH") { + return dispatch_request(req, res, patch_handlers_); + } + + res.status = 400; + return false; +} + +inline bool Server::dispatch_request(Request &req, Response &res, + const Handlers &handlers) { + for (const auto &x : handlers) { + const auto &pattern = x.first; + const auto &handler = x.second; + + if (std::regex_match(req.path, req.matches, pattern)) { + handler(req, res); + return true; + } + } + return false; +} + +inline void Server::apply_ranges(const Request &req, Response &res, + std::string &content_type, + std::string &boundary) { + if (req.ranges.size() > 1) { + boundary = detail::make_multipart_data_boundary(); + + auto it = res.headers.find("Content-Type"); + if (it != res.headers.end()) { + content_type = it->second; + res.headers.erase(it); + } + + res.headers.emplace("Content-Type", + "multipart/byteranges; boundary=" + boundary); + } + + auto type = detail::encoding_type(req, res); + + if (res.body.empty()) { + if (res.content_length_ > 0) { + size_t length = 0; + if (req.ranges.empty()) { + length = res.content_length_; + } else if (req.ranges.size() == 1) { + auto offsets = + detail::get_range_offset_and_length(req, res.content_length_, 0); + auto offset = offsets.first; + length = offsets.second; + auto content_range = detail::make_content_range_header_field( + offset, length, res.content_length_); + res.set_header("Content-Range", content_range); + } else { + length = detail::get_multipart_ranges_data_length(req, res, boundary, + content_type); + } + res.set_header("Content-Length", std::to_string(length)); + } else { + if (res.content_provider_) { + if (res.is_chunked_content_provider_) { + res.set_header("Transfer-Encoding", "chunked"); + if (type == detail::EncodingType::Gzip) { + res.set_header("Content-Encoding", "gzip"); + } else if (type == detail::EncodingType::Brotli) { + res.set_header("Content-Encoding", "br"); + } + } + } + } + } else { + if (req.ranges.empty()) { + ; + } else if (req.ranges.size() == 1) { + auto offsets = + detail::get_range_offset_and_length(req, res.body.size(), 0); + auto offset = offsets.first; + auto length = offsets.second; + auto content_range = detail::make_content_range_header_field( + offset, length, res.body.size()); + res.set_header("Content-Range", content_range); + if (offset < res.body.size()) { + res.body = res.body.substr(offset, length); + } else { + res.body.clear(); + res.status = 416; + } + } else { + std::string data; + if (detail::make_multipart_ranges_data(req, res, boundary, content_type, + data)) { + res.body.swap(data); + } else { + res.body.clear(); + res.status = 416; + } + } + + if (type != detail::EncodingType::None) { + std::unique_ptr compressor; + std::string content_encoding; + + if (type == detail::EncodingType::Gzip) { +#ifdef CPPHTTPLIB_ZLIB_SUPPORT + compressor = detail::make_unique(); + content_encoding = "gzip"; +#endif + } else if (type == detail::EncodingType::Brotli) { +#ifdef CPPHTTPLIB_BROTLI_SUPPORT + compressor = detail::make_unique(); + content_encoding = "br"; +#endif + } + + if (compressor) { + std::string compressed; + if (compressor->compress(res.body.data(), res.body.size(), true, + [&](const char *data, size_t data_len) { + compressed.append(data, data_len); + return true; + })) { + res.body.swap(compressed); + res.set_header("Content-Encoding", content_encoding); + } + } + } + + auto length = std::to_string(res.body.size()); + res.set_header("Content-Length", length); + } +} + +inline bool Server::dispatch_request_for_content_reader( + Request &req, Response &res, ContentReader content_reader, + const HandlersForContentReader &handlers) { + for (const auto &x : handlers) { + const auto &pattern = x.first; + const auto &handler = x.second; + + if (std::regex_match(req.path, req.matches, pattern)) { + handler(req, res, content_reader); + return true; + } + } + return false; +} + +inline bool +Server::process_request(Stream &strm, bool close_connection, + bool &connection_closed, + const std::function &setup_request) { + std::array buf{}; + + detail::stream_line_reader line_reader(strm, buf.data(), buf.size()); + + // Connection has been closed on client + if (!line_reader.getline()) { return false; } + + Request req; + Response res; + + res.version = "HTTP/1.1"; + + for (const auto &header : default_headers_) { + if (res.headers.find(header.first) == res.headers.end()) { + res.headers.insert(header); + } + } + +#ifdef _WIN32 + // TODO: Increase FD_SETSIZE statically (libzmq), dynamically (MySQL). +#else +#ifndef CPPHTTPLIB_USE_POLL + // Socket file descriptor exceeded FD_SETSIZE... + if (strm.socket() >= FD_SETSIZE) { + Headers dummy; + detail::read_headers(strm, dummy); + res.status = 500; + return write_response(strm, close_connection, req, res); + } +#endif +#endif + + // Check if the request URI doesn't exceed the limit + if (line_reader.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) { + Headers dummy; + detail::read_headers(strm, dummy); + res.status = 414; + return write_response(strm, close_connection, req, res); + } + + // Request line and headers + if (!parse_request_line(line_reader.ptr(), req) || + !detail::read_headers(strm, req.headers)) { + res.status = 400; + return write_response(strm, close_connection, req, res); + } + + if (req.get_header_value("Connection") == "close") { + connection_closed = true; + } + + if (req.version == "HTTP/1.0" && + req.get_header_value("Connection") != "Keep-Alive") { + connection_closed = true; + } + + strm.get_remote_ip_and_port(req.remote_addr, req.remote_port); + req.set_header("REMOTE_ADDR", req.remote_addr); + req.set_header("REMOTE_PORT", std::to_string(req.remote_port)); + + strm.get_local_ip_and_port(req.local_addr, req.local_port); + req.set_header("LOCAL_ADDR", req.local_addr); + req.set_header("LOCAL_PORT", std::to_string(req.local_port)); + + if (req.has_header("Range")) { + const auto &range_header_value = req.get_header_value("Range"); + if (!detail::parse_range_header(range_header_value, req.ranges)) { + res.status = 416; + return write_response(strm, close_connection, req, res); + } + } + + if (setup_request) { setup_request(req); } + + if (req.get_header_value("Expect") == "100-continue") { + auto status = 100; + if (expect_100_continue_handler_) { + status = expect_100_continue_handler_(req, res); + } + switch (status) { + case 100: + case 417: + strm.write_format("HTTP/1.1 %d %s\r\n\r\n", status, + detail::status_message(status)); + break; + default: return write_response(strm, close_connection, req, res); + } + } + + // Rounting + bool routed = false; +#ifdef CPPHTTPLIB_NO_EXCEPTIONS + routed = routing(req, res, strm); +#else + try { + routed = routing(req, res, strm); + } catch (std::exception &e) { + if (exception_handler_) { + auto ep = std::current_exception(); + exception_handler_(req, res, ep); + routed = true; + } else { + res.status = 500; + std::string val; + auto s = e.what(); + for (size_t i = 0; s[i]; i++) { + switch (s[i]) { + case '\r': val += "\\r"; break; + case '\n': val += "\\n"; break; + default: val += s[i]; break; + } + } + res.set_header("EXCEPTION_WHAT", val); + } + } catch (...) { + if (exception_handler_) { + auto ep = std::current_exception(); + exception_handler_(req, res, ep); + routed = true; + } else { + res.status = 500; + res.set_header("EXCEPTION_WHAT", "UNKNOWN"); + } + } +#endif + + if (routed) { + if (res.status == -1) { res.status = req.ranges.empty() ? 200 : 206; } + return write_response_with_content(strm, close_connection, req, res); + } else { + if (res.status == -1) { res.status = 404; } + return write_response(strm, close_connection, req, res); + } +} + +inline bool Server::is_valid() const { return true; } + +inline bool Server::process_and_close_socket(socket_t sock) { + auto ret = detail::process_server_socket( + svr_sock_, sock, keep_alive_max_count_, keep_alive_timeout_sec_, + read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, + write_timeout_usec_, + [this](Stream &strm, bool close_connection, bool &connection_closed) { + return process_request(strm, close_connection, connection_closed, + nullptr); + }); + + detail::shutdown_socket(sock); + detail::close_socket(sock); + return ret; +} + +// HTTP client implementation +inline ClientImpl::ClientImpl(const std::string &host) + : ClientImpl(host, 80, std::string(), std::string()) {} + +inline ClientImpl::ClientImpl(const std::string &host, int port) + : ClientImpl(host, port, std::string(), std::string()) {} + +inline ClientImpl::ClientImpl(const std::string &host, int port, + const std::string &client_cert_path, + const std::string &client_key_path) + : host_(host), port_(port), + host_and_port_(adjust_host_string(host) + ":" + std::to_string(port)), + client_cert_path_(client_cert_path), client_key_path_(client_key_path) {} + +inline ClientImpl::~ClientImpl() { + std::lock_guard guard(socket_mutex_); + shutdown_socket(socket_); + close_socket(socket_); +} + +inline bool ClientImpl::is_valid() const { return true; } + +inline void ClientImpl::copy_settings(const ClientImpl &rhs) { + client_cert_path_ = rhs.client_cert_path_; + client_key_path_ = rhs.client_key_path_; + connection_timeout_sec_ = rhs.connection_timeout_sec_; + read_timeout_sec_ = rhs.read_timeout_sec_; + read_timeout_usec_ = rhs.read_timeout_usec_; + write_timeout_sec_ = rhs.write_timeout_sec_; + write_timeout_usec_ = rhs.write_timeout_usec_; + basic_auth_username_ = rhs.basic_auth_username_; + basic_auth_password_ = rhs.basic_auth_password_; + bearer_token_auth_token_ = rhs.bearer_token_auth_token_; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + digest_auth_username_ = rhs.digest_auth_username_; + digest_auth_password_ = rhs.digest_auth_password_; +#endif + keep_alive_ = rhs.keep_alive_; + follow_location_ = rhs.follow_location_; + url_encode_ = rhs.url_encode_; + address_family_ = rhs.address_family_; + tcp_nodelay_ = rhs.tcp_nodelay_; + socket_options_ = rhs.socket_options_; + compress_ = rhs.compress_; + decompress_ = rhs.decompress_; + interface_ = rhs.interface_; + proxy_host_ = rhs.proxy_host_; + proxy_port_ = rhs.proxy_port_; + proxy_basic_auth_username_ = rhs.proxy_basic_auth_username_; + proxy_basic_auth_password_ = rhs.proxy_basic_auth_password_; + proxy_bearer_token_auth_token_ = rhs.proxy_bearer_token_auth_token_; +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + proxy_digest_auth_username_ = rhs.proxy_digest_auth_username_; + proxy_digest_auth_password_ = rhs.proxy_digest_auth_password_; +#endif +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + ca_cert_file_path_ = rhs.ca_cert_file_path_; + ca_cert_dir_path_ = rhs.ca_cert_dir_path_; + ca_cert_store_ = rhs.ca_cert_store_; +#endif +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + server_certificate_verification_ = rhs.server_certificate_verification_; +#endif + logger_ = rhs.logger_; +} + +inline socket_t ClientImpl::create_client_socket(Error &error) const { + if (!proxy_host_.empty() && proxy_port_ != -1) { + return detail::create_client_socket( + proxy_host_, std::string(), proxy_port_, address_family_, tcp_nodelay_, + socket_options_, connection_timeout_sec_, connection_timeout_usec_, + read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, + write_timeout_usec_, interface_, error); + } + + // Check is custom IP specified for host_ + std::string ip; + auto it = addr_map_.find(host_); + if (it != addr_map_.end()) ip = it->second; + + return detail::create_client_socket( + host_, ip, port_, address_family_, tcp_nodelay_, socket_options_, + connection_timeout_sec_, connection_timeout_usec_, read_timeout_sec_, + read_timeout_usec_, write_timeout_sec_, write_timeout_usec_, interface_, + error); +} + +inline bool ClientImpl::create_and_connect_socket(Socket &socket, + Error &error) { + auto sock = create_client_socket(error); + if (sock == INVALID_SOCKET) { return false; } + socket.sock = sock; + return true; +} + +inline void ClientImpl::shutdown_ssl(Socket & /*socket*/, + bool /*shutdown_gracefully*/) { + // If there are any requests in flight from threads other than us, then it's + // a thread-unsafe race because individual ssl* objects are not thread-safe. + assert(socket_requests_in_flight_ == 0 || + socket_requests_are_from_thread_ == std::this_thread::get_id()); +} + +inline void ClientImpl::shutdown_socket(Socket &socket) { + if (socket.sock == INVALID_SOCKET) { return; } + detail::shutdown_socket(socket.sock); +} + +inline void ClientImpl::close_socket(Socket &socket) { + // If there are requests in flight in another thread, usually closing + // the socket will be fine and they will simply receive an error when + // using the closed socket, but it is still a bug since rarely the OS + // may reassign the socket id to be used for a new socket, and then + // suddenly they will be operating on a live socket that is different + // than the one they intended! + assert(socket_requests_in_flight_ == 0 || + socket_requests_are_from_thread_ == std::this_thread::get_id()); + + // It is also a bug if this happens while SSL is still active +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + assert(socket.ssl == nullptr); +#endif + if (socket.sock == INVALID_SOCKET) { return; } + detail::close_socket(socket.sock); + socket.sock = INVALID_SOCKET; +} + +inline bool ClientImpl::read_response_line(Stream &strm, const Request &req, + Response &res) { + std::array buf{}; + + detail::stream_line_reader line_reader(strm, buf.data(), buf.size()); + + if (!line_reader.getline()) { return false; } + +#ifdef CPPHTTPLIB_ALLOW_LF_AS_LINE_TERMINATOR + const static std::regex re("(HTTP/1\\.[01]) (\\d{3})(?: (.*?))?\r\n"); +#else + const static std::regex re("(HTTP/1\\.[01]) (\\d{3})(?: (.*?))?\r?\n"); +#endif + + std::cmatch m; + if (!std::regex_match(line_reader.ptr(), m, re)) { + return req.method == "CONNECT"; + } + res.version = std::string(m[1]); + res.status = std::stoi(std::string(m[2])); + res.reason = std::string(m[3]); + + // Ignore '100 Continue' + while (res.status == 100) { + if (!line_reader.getline()) { return false; } // CRLF + if (!line_reader.getline()) { return false; } // next response line + + if (!std::regex_match(line_reader.ptr(), m, re)) { return false; } + res.version = std::string(m[1]); + res.status = std::stoi(std::string(m[2])); + res.reason = std::string(m[3]); + } + + return true; +} + +inline bool ClientImpl::send(Request &req, Response &res, Error &error) { + std::lock_guard request_mutex_guard(request_mutex_); + auto ret = send_(req, res, error); + if (error == Error::SSLPeerCouldBeClosed_) { + assert(!ret); + ret = send_(req, res, error); + } + return ret; +} + +inline bool ClientImpl::send_(Request &req, Response &res, Error &error) { + { + std::lock_guard guard(socket_mutex_); + + // Set this to false immediately - if it ever gets set to true by the end of + // the request, we know another thread instructed us to close the socket. + socket_should_be_closed_when_request_is_done_ = false; + + auto is_alive = false; + if (socket_.is_open()) { + is_alive = detail::is_socket_alive(socket_.sock); + if (!is_alive) { + // Attempt to avoid sigpipe by shutting down nongracefully if it seems + // like the other side has already closed the connection Also, there + // cannot be any requests in flight from other threads since we locked + // request_mutex_, so safe to close everything immediately + const bool shutdown_gracefully = false; + shutdown_ssl(socket_, shutdown_gracefully); + shutdown_socket(socket_); + close_socket(socket_); + } + } + + if (!is_alive) { + if (!create_and_connect_socket(socket_, error)) { return false; } + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + // TODO: refactoring + if (is_ssl()) { + auto &scli = static_cast(*this); + if (!proxy_host_.empty() && proxy_port_ != -1) { + auto success = false; + if (!scli.connect_with_proxy(socket_, res, success, error)) { + return success; + } + } + + if (!scli.initialize_ssl(socket_, error)) { return false; } + } +#endif + } + + // Mark the current socket as being in use so that it cannot be closed by + // anyone else while this request is ongoing, even though we will be + // releasing the mutex. + if (socket_requests_in_flight_ > 1) { + assert(socket_requests_are_from_thread_ == std::this_thread::get_id()); + } + socket_requests_in_flight_ += 1; + socket_requests_are_from_thread_ = std::this_thread::get_id(); + } + + for (const auto &header : default_headers_) { + if (req.headers.find(header.first) == req.headers.end()) { + req.headers.insert(header); + } + } + + auto ret = false; + auto close_connection = !keep_alive_; + + auto se = detail::scope_exit([&]() { + // Briefly lock mutex in order to mark that a request is no longer ongoing + std::lock_guard guard(socket_mutex_); + socket_requests_in_flight_ -= 1; + if (socket_requests_in_flight_ <= 0) { + assert(socket_requests_in_flight_ == 0); + socket_requests_are_from_thread_ = std::thread::id(); + } + + if (socket_should_be_closed_when_request_is_done_ || close_connection || + !ret) { + shutdown_ssl(socket_, true); + shutdown_socket(socket_); + close_socket(socket_); + } + }); + + ret = process_socket(socket_, [&](Stream &strm) { + return handle_request(strm, req, res, close_connection, error); + }); + + if (!ret) { + if (error == Error::Success) { error = Error::Unknown; } + } + + return ret; +} + +inline Result ClientImpl::send(const Request &req) { + auto req2 = req; + return send_(std::move(req2)); +} + +inline Result ClientImpl::send_(Request &&req) { + auto res = detail::make_unique(); + auto error = Error::Success; + auto ret = send(req, *res, error); + return Result{ret ? std::move(res) : nullptr, error, std::move(req.headers)}; +} + +inline bool ClientImpl::handle_request(Stream &strm, Request &req, + Response &res, bool close_connection, + Error &error) { + if (req.path.empty()) { + error = Error::Connection; + return false; + } + + auto req_save = req; + + bool ret; + + if (!is_ssl() && !proxy_host_.empty() && proxy_port_ != -1) { + auto req2 = req; + req2.path = "http://" + host_and_port_ + req.path; + ret = process_request(strm, req2, res, close_connection, error); + req = req2; + req.path = req_save.path; + } else { + ret = process_request(strm, req, res, close_connection, error); + } + + if (!ret) { return false; } + + if (300 < res.status && res.status < 400 && follow_location_) { + req = req_save; + ret = redirect(req, res, error); + } + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + if ((res.status == 401 || res.status == 407) && + req.authorization_count_ < 5) { + auto is_proxy = res.status == 407; + const auto &username = + is_proxy ? proxy_digest_auth_username_ : digest_auth_username_; + const auto &password = + is_proxy ? proxy_digest_auth_password_ : digest_auth_password_; + + if (!username.empty() && !password.empty()) { + std::map auth; + if (detail::parse_www_authenticate(res, auth, is_proxy)) { + Request new_req = req; + new_req.authorization_count_ += 1; + new_req.headers.erase(is_proxy ? "Proxy-Authorization" + : "Authorization"); + new_req.headers.insert(detail::make_digest_authentication_header( + req, auth, new_req.authorization_count_, detail::random_string(10), + username, password, is_proxy)); + + Response new_res; + + ret = send(new_req, new_res, error); + if (ret) { res = new_res; } + } + } + } +#endif + + return ret; +} + +inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) { + if (req.redirect_count_ == 0) { + error = Error::ExceedRedirectCount; + return false; + } + + auto location = res.get_header_value("location"); + if (location.empty()) { return false; } + + const static std::regex re( + R"((?:(https?):)?(?://(?:\[([\d:]+)\]|([^:/?#]+))(?::(\d+))?)?([^?#]*)(\?[^#]*)?(?:#.*)?)"); + + std::smatch m; + if (!std::regex_match(location, m, re)) { return false; } + + auto scheme = is_ssl() ? "https" : "http"; + + auto next_scheme = m[1].str(); + auto next_host = m[2].str(); + if (next_host.empty()) { next_host = m[3].str(); } + auto port_str = m[4].str(); + auto next_path = m[5].str(); + auto next_query = m[6].str(); + + auto next_port = port_; + if (!port_str.empty()) { + next_port = std::stoi(port_str); + } else if (!next_scheme.empty()) { + next_port = next_scheme == "https" ? 443 : 80; + } + + if (next_scheme.empty()) { next_scheme = scheme; } + if (next_host.empty()) { next_host = host_; } + if (next_path.empty()) { next_path = "/"; } + + auto path = detail::decode_url(next_path, true) + next_query; + + if (next_scheme == scheme && next_host == host_ && next_port == port_) { + return detail::redirect(*this, req, res, path, location, error); + } else { + if (next_scheme == "https") { +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + SSLClient cli(next_host.c_str(), next_port); + cli.copy_settings(*this); + if (ca_cert_store_) { cli.set_ca_cert_store(ca_cert_store_); } + return detail::redirect(cli, req, res, path, location, error); +#else + return false; +#endif + } else { + ClientImpl cli(next_host.c_str(), next_port); + cli.copy_settings(*this); + return detail::redirect(cli, req, res, path, location, error); + } + } +} + +inline bool ClientImpl::write_content_with_provider(Stream &strm, + const Request &req, + Error &error) { + auto is_shutting_down = []() { return false; }; + + if (req.is_chunked_content_provider_) { + // TODO: Brotli support + std::unique_ptr compressor; +#ifdef CPPHTTPLIB_ZLIB_SUPPORT + if (compress_) { + compressor = detail::make_unique(); + } else +#endif + { + compressor = detail::make_unique(); + } + + return detail::write_content_chunked(strm, req.content_provider_, + is_shutting_down, *compressor, error); + } else { + return detail::write_content(strm, req.content_provider_, 0, + req.content_length_, is_shutting_down, error); + } +} + +inline bool ClientImpl::write_request(Stream &strm, Request &req, + bool close_connection, Error &error) { + // Prepare additional headers + if (close_connection) { + if (!req.has_header("Connection")) { + req.headers.emplace("Connection", "close"); + } + } + + if (!req.has_header("Host")) { + if (is_ssl()) { + if (port_ == 443) { + req.headers.emplace("Host", host_); + } else { + req.headers.emplace("Host", host_and_port_); + } + } else { + if (port_ == 80) { + req.headers.emplace("Host", host_); + } else { + req.headers.emplace("Host", host_and_port_); + } + } + } + + if (!req.has_header("Accept")) { req.headers.emplace("Accept", "*/*"); } + +#ifndef CPPHTTPLIB_NO_DEFAULT_USER_AGENT + if (!req.has_header("User-Agent")) { + auto agent = std::string("cpp-httplib/") + CPPHTTPLIB_VERSION; + req.headers.emplace("User-Agent", agent); + } +#endif + + if (req.body.empty()) { + if (req.content_provider_) { + if (!req.is_chunked_content_provider_) { + if (!req.has_header("Content-Length")) { + auto length = std::to_string(req.content_length_); + req.headers.emplace("Content-Length", length); + } + } + } else { + if (req.method == "POST" || req.method == "PUT" || + req.method == "PATCH") { + req.headers.emplace("Content-Length", "0"); + } + } + } else { + if (!req.has_header("Content-Type")) { + req.headers.emplace("Content-Type", "text/plain"); + } + + if (!req.has_header("Content-Length")) { + auto length = std::to_string(req.body.size()); + req.headers.emplace("Content-Length", length); + } + } + + if (!basic_auth_password_.empty() || !basic_auth_username_.empty()) { + if (!req.has_header("Authorization")) { + req.headers.insert(make_basic_authentication_header( + basic_auth_username_, basic_auth_password_, false)); + } + } + + if (!proxy_basic_auth_username_.empty() && + !proxy_basic_auth_password_.empty()) { + if (!req.has_header("Proxy-Authorization")) { + req.headers.insert(make_basic_authentication_header( + proxy_basic_auth_username_, proxy_basic_auth_password_, true)); + } + } + + if (!bearer_token_auth_token_.empty()) { + if (!req.has_header("Authorization")) { + req.headers.insert(make_bearer_token_authentication_header( + bearer_token_auth_token_, false)); + } + } + + if (!proxy_bearer_token_auth_token_.empty()) { + if (!req.has_header("Proxy-Authorization")) { + req.headers.insert(make_bearer_token_authentication_header( + proxy_bearer_token_auth_token_, true)); + } + } + + // Request line and headers + { + detail::BufferStream bstrm; + + const auto &path = url_encode_ ? detail::encode_url(req.path) : req.path; + bstrm.write_format("%s %s HTTP/1.1\r\n", req.method.c_str(), path.c_str()); + + detail::write_headers(bstrm, req.headers); + + // Flush buffer + auto &data = bstrm.get_buffer(); + if (!detail::write_data(strm, data.data(), data.size())) { + error = Error::Write; + return false; + } + } + + // Body + if (req.body.empty()) { + return write_content_with_provider(strm, req, error); + } + + if (!detail::write_data(strm, req.body.data(), req.body.size())) { + error = Error::Write; + return false; + } + + return true; +} + +inline std::unique_ptr ClientImpl::send_with_content_provider( + Request &req, const char *body, size_t content_length, + ContentProvider content_provider, + ContentProviderWithoutLength content_provider_without_length, + const std::string &content_type, Error &error) { + if (!content_type.empty()) { + req.headers.emplace("Content-Type", content_type); + } + +#ifdef CPPHTTPLIB_ZLIB_SUPPORT + if (compress_) { req.headers.emplace("Content-Encoding", "gzip"); } +#endif + +#ifdef CPPHTTPLIB_ZLIB_SUPPORT + if (compress_ && !content_provider_without_length) { + // TODO: Brotli support + detail::gzip_compressor compressor; + + if (content_provider) { + auto ok = true; + size_t offset = 0; + DataSink data_sink; + + data_sink.write = [&](const char *data, size_t data_len) -> bool { + if (ok) { + auto last = offset + data_len == content_length; + + auto ret = compressor.compress( + data, data_len, last, + [&](const char *compressed_data, size_t compressed_data_len) { + req.body.append(compressed_data, compressed_data_len); + return true; + }); + + if (ret) { + offset += data_len; + } else { + ok = false; + } + } + return ok; + }; + + while (ok && offset < content_length) { + if (!content_provider(offset, content_length - offset, data_sink)) { + error = Error::Canceled; + return nullptr; + } + } + } else { + if (!compressor.compress(body, content_length, true, + [&](const char *data, size_t data_len) { + req.body.append(data, data_len); + return true; + })) { + error = Error::Compression; + return nullptr; + } + } + } else +#endif + { + if (content_provider) { + req.content_length_ = content_length; + req.content_provider_ = std::move(content_provider); + req.is_chunked_content_provider_ = false; + } else if (content_provider_without_length) { + req.content_length_ = 0; + req.content_provider_ = detail::ContentProviderAdapter( + std::move(content_provider_without_length)); + req.is_chunked_content_provider_ = true; + req.headers.emplace("Transfer-Encoding", "chunked"); + } else { + req.body.assign(body, content_length); + ; + } + } + + auto res = detail::make_unique(); + return send(req, *res, error) ? std::move(res) : nullptr; +} + +inline Result ClientImpl::send_with_content_provider( + const std::string &method, const std::string &path, const Headers &headers, + const char *body, size_t content_length, ContentProvider content_provider, + ContentProviderWithoutLength content_provider_without_length, + const std::string &content_type) { + Request req; + req.method = method; + req.headers = headers; + req.path = path; + + auto error = Error::Success; + + auto res = send_with_content_provider( + req, body, content_length, std::move(content_provider), + std::move(content_provider_without_length), content_type, error); + + return Result{std::move(res), error, std::move(req.headers)}; +} + +inline std::string +ClientImpl::adjust_host_string(const std::string &host) const { + if (host.find(':') != std::string::npos) { return "[" + host + "]"; } + return host; +} + +inline bool ClientImpl::process_request(Stream &strm, Request &req, + Response &res, bool close_connection, + Error &error) { + // Send request + if (!write_request(strm, req, close_connection, error)) { return false; } + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + if (is_ssl()) { + auto is_proxy_enabled = !proxy_host_.empty() && proxy_port_ != -1; + if (!is_proxy_enabled) { + char buf[1]; + if (SSL_peek(socket_.ssl, buf, 1) == 0 && + SSL_get_error(socket_.ssl, 0) == SSL_ERROR_ZERO_RETURN) { + error = Error::SSLPeerCouldBeClosed_; + return false; + } + } + } +#endif + + // Receive response and headers + if (!read_response_line(strm, req, res) || + !detail::read_headers(strm, res.headers)) { + error = Error::Read; + return false; + } + + // Body + if ((res.status != 204) && req.method != "HEAD" && req.method != "CONNECT") { + auto redirect = 300 < res.status && res.status < 400 && follow_location_; + + if (req.response_handler && !redirect) { + if (!req.response_handler(res)) { + error = Error::Canceled; + return false; + } + } + + auto out = + req.content_receiver + ? static_cast( + [&](const char *buf, size_t n, uint64_t off, uint64_t len) { + if (redirect) { return true; } + auto ret = req.content_receiver(buf, n, off, len); + if (!ret) { error = Error::Canceled; } + return ret; + }) + : static_cast( + [&](const char *buf, size_t n, uint64_t /*off*/, + uint64_t /*len*/) { + if (res.body.size() + n > res.body.max_size()) { + return false; + } + res.body.append(buf, n); + return true; + }); + + auto progress = [&](uint64_t current, uint64_t total) { + if (!req.progress || redirect) { return true; } + auto ret = req.progress(current, total); + if (!ret) { error = Error::Canceled; } + return ret; + }; + + int dummy_status; + if (!detail::read_content(strm, res, (std::numeric_limits::max)(), + dummy_status, std::move(progress), std::move(out), + decompress_)) { + if (error != Error::Canceled) { error = Error::Read; } + return false; + } + } + + if (res.get_header_value("Connection") == "close" || + (res.version == "HTTP/1.0" && res.reason != "Connection established")) { + // TODO this requires a not-entirely-obvious chain of calls to be correct + // for this to be safe. Maybe a code refactor (such as moving this out to + // the send function and getting rid of the recursiveness of the mutex) + // could make this more obvious. + + // This is safe to call because process_request is only called by + // handle_request which is only called by send, which locks the request + // mutex during the process. It would be a bug to call it from a different + // thread since it's a thread-safety issue to do these things to the socket + // if another thread is using the socket. + std::lock_guard guard(socket_mutex_); + shutdown_ssl(socket_, true); + shutdown_socket(socket_); + close_socket(socket_); + } + + // Log + if (logger_) { logger_(req, res); } + + return true; +} + +inline ContentProviderWithoutLength ClientImpl::get_multipart_content_provider( + const std::string &boundary, const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items) { + size_t cur_item = 0, cur_start = 0; + // cur_item and cur_start are copied to within the std::function and maintain + // state between successive calls + return [&, cur_item, cur_start](size_t offset, + DataSink &sink) mutable -> bool { + if (!offset && items.size()) { + sink.os << detail::serialize_multipart_formdata(items, boundary, false); + return true; + } else if (cur_item < provider_items.size()) { + if (!cur_start) { + const auto &begin = detail::serialize_multipart_formdata_item_begin( + provider_items[cur_item], boundary); + offset += begin.size(); + cur_start = offset; + sink.os << begin; + } + + DataSink cur_sink; + bool has_data = true; + cur_sink.write = sink.write; + cur_sink.done = [&]() { has_data = false; }; + + if (!provider_items[cur_item].provider(offset - cur_start, cur_sink)) + return false; + + if (!has_data) { + sink.os << detail::serialize_multipart_formdata_item_end(); + cur_item++; + cur_start = 0; + } + return true; + } else { + sink.os << detail::serialize_multipart_formdata_finish(boundary); + sink.done(); + return true; + } + }; +} + +inline bool +ClientImpl::process_socket(const Socket &socket, + std::function callback) { + return detail::process_client_socket( + socket.sock, read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, + write_timeout_usec_, std::move(callback)); +} + +inline bool ClientImpl::is_ssl() const { return false; } + +inline Result ClientImpl::Get(const std::string &path) { + return Get(path, Headers(), Progress()); +} + +inline Result ClientImpl::Get(const std::string &path, Progress progress) { + return Get(path, Headers(), std::move(progress)); +} + +inline Result ClientImpl::Get(const std::string &path, const Headers &headers) { + return Get(path, headers, Progress()); +} + +inline Result ClientImpl::Get(const std::string &path, const Headers &headers, + Progress progress) { + Request req; + req.method = "GET"; + req.path = path; + req.headers = headers; + req.progress = std::move(progress); + + return send_(std::move(req)); +} + +inline Result ClientImpl::Get(const std::string &path, + ContentReceiver content_receiver) { + return Get(path, Headers(), nullptr, std::move(content_receiver), nullptr); +} + +inline Result ClientImpl::Get(const std::string &path, + ContentReceiver content_receiver, + Progress progress) { + return Get(path, Headers(), nullptr, std::move(content_receiver), + std::move(progress)); +} + +inline Result ClientImpl::Get(const std::string &path, const Headers &headers, + ContentReceiver content_receiver) { + return Get(path, headers, nullptr, std::move(content_receiver), nullptr); +} + +inline Result ClientImpl::Get(const std::string &path, const Headers &headers, + ContentReceiver content_receiver, + Progress progress) { + return Get(path, headers, nullptr, std::move(content_receiver), + std::move(progress)); +} + +inline Result ClientImpl::Get(const std::string &path, + ResponseHandler response_handler, + ContentReceiver content_receiver) { + return Get(path, Headers(), std::move(response_handler), + std::move(content_receiver), nullptr); +} + +inline Result ClientImpl::Get(const std::string &path, const Headers &headers, + ResponseHandler response_handler, + ContentReceiver content_receiver) { + return Get(path, headers, std::move(response_handler), + std::move(content_receiver), nullptr); +} + +inline Result ClientImpl::Get(const std::string &path, + ResponseHandler response_handler, + ContentReceiver content_receiver, + Progress progress) { + return Get(path, Headers(), std::move(response_handler), + std::move(content_receiver), std::move(progress)); +} + +inline Result ClientImpl::Get(const std::string &path, const Headers &headers, + ResponseHandler response_handler, + ContentReceiver content_receiver, + Progress progress) { + Request req; + req.method = "GET"; + req.path = path; + req.headers = headers; + req.response_handler = std::move(response_handler); + req.content_receiver = + [content_receiver](const char *data, size_t data_length, + uint64_t /*offset*/, uint64_t /*total_length*/) { + return content_receiver(data, data_length); + }; + req.progress = std::move(progress); + + return send_(std::move(req)); +} + +inline Result ClientImpl::Get(const std::string &path, const Params ¶ms, + const Headers &headers, Progress progress) { + if (params.empty()) { return Get(path, headers); } + + std::string path_with_query = append_query_params(path, params); + return Get(path_with_query.c_str(), headers, progress); +} + +inline Result ClientImpl::Get(const std::string &path, const Params ¶ms, + const Headers &headers, + ContentReceiver content_receiver, + Progress progress) { + return Get(path, params, headers, nullptr, content_receiver, progress); +} + +inline Result ClientImpl::Get(const std::string &path, const Params ¶ms, + const Headers &headers, + ResponseHandler response_handler, + ContentReceiver content_receiver, + Progress progress) { + if (params.empty()) { + return Get(path, headers, response_handler, content_receiver, progress); + } + + std::string path_with_query = append_query_params(path, params); + return Get(path_with_query.c_str(), headers, response_handler, + content_receiver, progress); +} + +inline Result ClientImpl::Head(const std::string &path) { + return Head(path, Headers()); +} + +inline Result ClientImpl::Head(const std::string &path, + const Headers &headers) { + Request req; + req.method = "HEAD"; + req.headers = headers; + req.path = path; + + return send_(std::move(req)); +} + +inline Result ClientImpl::Post(const std::string &path) { + return Post(path, std::string(), std::string()); +} + +inline Result ClientImpl::Post(const std::string &path, + const Headers &headers) { + return Post(path, headers, nullptr, 0, std::string()); +} + +inline Result ClientImpl::Post(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type) { + return Post(path, Headers(), body, content_length, content_type); +} + +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type) { + return send_with_content_provider("POST", path, headers, body, content_length, + nullptr, nullptr, content_type); +} + +inline Result ClientImpl::Post(const std::string &path, const std::string &body, + const std::string &content_type) { + return Post(path, Headers(), body, content_type); +} + +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type) { + return send_with_content_provider("POST", path, headers, body.data(), + body.size(), nullptr, nullptr, + content_type); +} + +inline Result ClientImpl::Post(const std::string &path, const Params ¶ms) { + return Post(path, Headers(), params); +} + +inline Result ClientImpl::Post(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return Post(path, Headers(), content_length, std::move(content_provider), + content_type); +} + +inline Result ClientImpl::Post(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return Post(path, Headers(), std::move(content_provider), content_type); +} + +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return send_with_content_provider("POST", path, headers, nullptr, + content_length, std::move(content_provider), + nullptr, content_type); +} + +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return send_with_content_provider("POST", path, headers, nullptr, 0, nullptr, + std::move(content_provider), content_type); +} + +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + const Params ¶ms) { + auto query = detail::params_to_query_str(params); + return Post(path, headers, query, "application/x-www-form-urlencoded"); +} + +inline Result ClientImpl::Post(const std::string &path, + const MultipartFormDataItems &items) { + return Post(path, Headers(), items); +} + +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items) { + const auto &boundary = detail::make_multipart_data_boundary(); + const auto &content_type = + detail::serialize_multipart_formdata_get_content_type(boundary); + const auto &body = detail::serialize_multipart_formdata(items, boundary); + return Post(path, headers, body, content_type.c_str()); +} + +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const std::string &boundary) { + if (!detail::is_multipart_boundary_chars_valid(boundary)) { + return Result{nullptr, Error::UnsupportedMultipartBoundaryChars}; + } + + const auto &content_type = + detail::serialize_multipart_formdata_get_content_type(boundary); + const auto &body = detail::serialize_multipart_formdata(items, boundary); + return Post(path, headers, body, content_type.c_str()); +} + +inline Result +ClientImpl::Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items) { + const auto &boundary = detail::make_multipart_data_boundary(); + const auto &content_type = + detail::serialize_multipart_formdata_get_content_type(boundary); + return send_with_content_provider( + "POST", path, headers, nullptr, 0, nullptr, + get_multipart_content_provider(boundary, items, provider_items), + content_type); +} + +inline Result ClientImpl::Put(const std::string &path) { + return Put(path, std::string(), std::string()); +} + +inline Result ClientImpl::Put(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type) { + return Put(path, Headers(), body, content_length, content_type); +} + +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type) { + return send_with_content_provider("PUT", path, headers, body, content_length, + nullptr, nullptr, content_type); +} + +inline Result ClientImpl::Put(const std::string &path, const std::string &body, + const std::string &content_type) { + return Put(path, Headers(), body, content_type); +} + +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type) { + return send_with_content_provider("PUT", path, headers, body.data(), + body.size(), nullptr, nullptr, + content_type); +} + +inline Result ClientImpl::Put(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return Put(path, Headers(), content_length, std::move(content_provider), + content_type); +} + +inline Result ClientImpl::Put(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return Put(path, Headers(), std::move(content_provider), content_type); +} + +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return send_with_content_provider("PUT", path, headers, nullptr, + content_length, std::move(content_provider), + nullptr, content_type); +} + +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return send_with_content_provider("PUT", path, headers, nullptr, 0, nullptr, + std::move(content_provider), content_type); +} + +inline Result ClientImpl::Put(const std::string &path, const Params ¶ms) { + return Put(path, Headers(), params); +} + +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + const Params ¶ms) { + auto query = detail::params_to_query_str(params); + return Put(path, headers, query, "application/x-www-form-urlencoded"); +} + +inline Result ClientImpl::Put(const std::string &path, + const MultipartFormDataItems &items) { + return Put(path, Headers(), items); +} + +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items) { + const auto &boundary = detail::make_multipart_data_boundary(); + const auto &content_type = + detail::serialize_multipart_formdata_get_content_type(boundary); + const auto &body = detail::serialize_multipart_formdata(items, boundary); + return Put(path, headers, body, content_type); +} + +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const std::string &boundary) { + if (!detail::is_multipart_boundary_chars_valid(boundary)) { + return Result{nullptr, Error::UnsupportedMultipartBoundaryChars}; + } + + const auto &content_type = + detail::serialize_multipart_formdata_get_content_type(boundary); + const auto &body = detail::serialize_multipart_formdata(items, boundary); + return Put(path, headers, body, content_type); +} + +inline Result +ClientImpl::Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items) { + const auto &boundary = detail::make_multipart_data_boundary(); + const auto &content_type = + detail::serialize_multipart_formdata_get_content_type(boundary); + return send_with_content_provider( + "PUT", path, headers, nullptr, 0, nullptr, + get_multipart_content_provider(boundary, items, provider_items), + content_type); +} +inline Result ClientImpl::Patch(const std::string &path) { + return Patch(path, std::string(), std::string()); +} + +inline Result ClientImpl::Patch(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type) { + return Patch(path, Headers(), body, content_length, content_type); +} + +inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type) { + return send_with_content_provider("PATCH", path, headers, body, + content_length, nullptr, nullptr, + content_type); +} + +inline Result ClientImpl::Patch(const std::string &path, + const std::string &body, + const std::string &content_type) { + return Patch(path, Headers(), body, content_type); +} + +inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type) { + return send_with_content_provider("PATCH", path, headers, body.data(), + body.size(), nullptr, nullptr, + content_type); +} + +inline Result ClientImpl::Patch(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return Patch(path, Headers(), content_length, std::move(content_provider), + content_type); +} + +inline Result ClientImpl::Patch(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return Patch(path, Headers(), std::move(content_provider), content_type); +} + +inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, + size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return send_with_content_provider("PATCH", path, headers, nullptr, + content_length, std::move(content_provider), + nullptr, content_type); +} + +inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return send_with_content_provider("PATCH", path, headers, nullptr, 0, nullptr, + std::move(content_provider), content_type); +} + +inline Result ClientImpl::Delete(const std::string &path) { + return Delete(path, Headers(), std::string(), std::string()); +} + +inline Result ClientImpl::Delete(const std::string &path, + const Headers &headers) { + return Delete(path, headers, std::string(), std::string()); +} + +inline Result ClientImpl::Delete(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type) { + return Delete(path, Headers(), body, content_length, content_type); +} + +inline Result ClientImpl::Delete(const std::string &path, + const Headers &headers, const char *body, + size_t content_length, + const std::string &content_type) { + Request req; + req.method = "DELETE"; + req.headers = headers; + req.path = path; + + if (!content_type.empty()) { + req.headers.emplace("Content-Type", content_type); + } + req.body.assign(body, content_length); + + return send_(std::move(req)); +} + +inline Result ClientImpl::Delete(const std::string &path, + const std::string &body, + const std::string &content_type) { + return Delete(path, Headers(), body.data(), body.size(), content_type); +} + +inline Result ClientImpl::Delete(const std::string &path, + const Headers &headers, + const std::string &body, + const std::string &content_type) { + return Delete(path, headers, body.data(), body.size(), content_type); +} + +inline Result ClientImpl::Options(const std::string &path) { + return Options(path, Headers()); +} + +inline Result ClientImpl::Options(const std::string &path, + const Headers &headers) { + Request req; + req.method = "OPTIONS"; + req.headers = headers; + req.path = path; + + return send_(std::move(req)); +} + +inline size_t ClientImpl::is_socket_open() const { + std::lock_guard guard(socket_mutex_); + return socket_.is_open(); +} + +inline socket_t ClientImpl::socket() const { return socket_.sock; } + +inline void ClientImpl::stop() { + std::lock_guard guard(socket_mutex_); + + // If there is anything ongoing right now, the ONLY thread-safe thing we can + // do is to shutdown_socket, so that threads using this socket suddenly + // discover they can't read/write any more and error out. Everything else + // (closing the socket, shutting ssl down) is unsafe because these actions are + // not thread-safe. + if (socket_requests_in_flight_ > 0) { + shutdown_socket(socket_); + + // Aside from that, we set a flag for the socket to be closed when we're + // done. + socket_should_be_closed_when_request_is_done_ = true; + return; + } + + // Otherwise, still holding the mutex, we can shut everything down ourselves + shutdown_ssl(socket_, true); + shutdown_socket(socket_); + close_socket(socket_); +} + +inline void ClientImpl::set_connection_timeout(time_t sec, time_t usec) { + connection_timeout_sec_ = sec; + connection_timeout_usec_ = usec; +} + +inline void ClientImpl::set_read_timeout(time_t sec, time_t usec) { + read_timeout_sec_ = sec; + read_timeout_usec_ = usec; +} + +inline void ClientImpl::set_write_timeout(time_t sec, time_t usec) { + write_timeout_sec_ = sec; + write_timeout_usec_ = usec; +} + +inline void ClientImpl::set_basic_auth(const std::string &username, + const std::string &password) { + basic_auth_username_ = username; + basic_auth_password_ = password; +} + +inline void ClientImpl::set_bearer_token_auth(const std::string &token) { + bearer_token_auth_token_ = token; +} + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void ClientImpl::set_digest_auth(const std::string &username, + const std::string &password) { + digest_auth_username_ = username; + digest_auth_password_ = password; +} +#endif + +inline void ClientImpl::set_keep_alive(bool on) { keep_alive_ = on; } + +inline void ClientImpl::set_follow_location(bool on) { follow_location_ = on; } + +inline void ClientImpl::set_url_encode(bool on) { url_encode_ = on; } + +inline void +ClientImpl::set_hostname_addr_map(std::map addr_map) { + addr_map_ = std::move(addr_map); +} + +inline void ClientImpl::set_default_headers(Headers headers) { + default_headers_ = std::move(headers); +} + +inline void ClientImpl::set_address_family(int family) { + address_family_ = family; +} + +inline void ClientImpl::set_tcp_nodelay(bool on) { tcp_nodelay_ = on; } + +inline void ClientImpl::set_socket_options(SocketOptions socket_options) { + socket_options_ = std::move(socket_options); +} + +inline void ClientImpl::set_compress(bool on) { compress_ = on; } + +inline void ClientImpl::set_decompress(bool on) { decompress_ = on; } + +inline void ClientImpl::set_interface(const std::string &intf) { + interface_ = intf; +} + +inline void ClientImpl::set_proxy(const std::string &host, int port) { + proxy_host_ = host; + proxy_port_ = port; +} + +inline void ClientImpl::set_proxy_basic_auth(const std::string &username, + const std::string &password) { + proxy_basic_auth_username_ = username; + proxy_basic_auth_password_ = password; +} + +inline void ClientImpl::set_proxy_bearer_token_auth(const std::string &token) { + proxy_bearer_token_auth_token_ = token; +} + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void ClientImpl::set_proxy_digest_auth(const std::string &username, + const std::string &password) { + proxy_digest_auth_username_ = username; + proxy_digest_auth_password_ = password; +} +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void ClientImpl::set_ca_cert_path(const std::string &ca_cert_file_path, + const std::string &ca_cert_dir_path) { + ca_cert_file_path_ = ca_cert_file_path; + ca_cert_dir_path_ = ca_cert_dir_path; +} + +inline void ClientImpl::set_ca_cert_store(X509_STORE *ca_cert_store) { + if (ca_cert_store && ca_cert_store != ca_cert_store_) { + ca_cert_store_ = ca_cert_store; + } +} +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void ClientImpl::enable_server_certificate_verification(bool enabled) { + server_certificate_verification_ = enabled; +} +#endif + +inline void ClientImpl::set_logger(Logger logger) { + logger_ = std::move(logger); +} + +/* + * SSL Implementation + */ +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +namespace detail { + +template +inline SSL *ssl_new(socket_t sock, SSL_CTX *ctx, std::mutex &ctx_mutex, + U SSL_connect_or_accept, V setup) { + SSL *ssl = nullptr; + { + std::lock_guard guard(ctx_mutex); + ssl = SSL_new(ctx); + } + + if (ssl) { + set_nonblocking(sock, true); + auto bio = BIO_new_socket(static_cast(sock), BIO_NOCLOSE); + BIO_set_nbio(bio, 1); + SSL_set_bio(ssl, bio, bio); + + if (!setup(ssl) || SSL_connect_or_accept(ssl) != 1) { + SSL_shutdown(ssl); + { + std::lock_guard guard(ctx_mutex); + SSL_free(ssl); + } + set_nonblocking(sock, false); + return nullptr; + } + BIO_set_nbio(bio, 0); + set_nonblocking(sock, false); + } + + return ssl; +} + +inline void ssl_delete(std::mutex &ctx_mutex, SSL *ssl, + bool shutdown_gracefully) { + // sometimes we may want to skip this to try to avoid SIGPIPE if we know + // the remote has closed the network connection + // Note that it is not always possible to avoid SIGPIPE, this is merely a + // best-efforts. + if (shutdown_gracefully) { SSL_shutdown(ssl); } + + std::lock_guard guard(ctx_mutex); + SSL_free(ssl); +} + +template +bool ssl_connect_or_accept_nonblocking(socket_t sock, SSL *ssl, + U ssl_connect_or_accept, + time_t timeout_sec, + time_t timeout_usec) { + int res = 0; + while ((res = ssl_connect_or_accept(ssl)) != 1) { + auto err = SSL_get_error(ssl, res); + switch (err) { + case SSL_ERROR_WANT_READ: + if (select_read(sock, timeout_sec, timeout_usec) > 0) { continue; } + break; + case SSL_ERROR_WANT_WRITE: + if (select_write(sock, timeout_sec, timeout_usec) > 0) { continue; } + break; + default: break; + } + return false; + } + return true; +} + +template +inline bool process_server_socket_ssl( + const std::atomic &svr_sock, SSL *ssl, socket_t sock, + size_t keep_alive_max_count, time_t keep_alive_timeout_sec, + time_t read_timeout_sec, time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, T callback) { + return process_server_socket_core( + svr_sock, sock, keep_alive_max_count, keep_alive_timeout_sec, + [&](bool close_connection, bool &connection_closed) { + SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec, + write_timeout_sec, write_timeout_usec); + return callback(strm, close_connection, connection_closed); + }); +} + +template +inline bool +process_client_socket_ssl(SSL *ssl, socket_t sock, time_t read_timeout_sec, + time_t read_timeout_usec, time_t write_timeout_sec, + time_t write_timeout_usec, T callback) { + SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec, + write_timeout_sec, write_timeout_usec); + return callback(strm); +} + +class SSLInit { +public: + SSLInit() { + OPENSSL_init_ssl( + OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); + } +}; + +// SSL socket stream implementation +inline SSLSocketStream::SSLSocketStream(socket_t sock, SSL *ssl, + time_t read_timeout_sec, + time_t read_timeout_usec, + time_t write_timeout_sec, + time_t write_timeout_usec) + : sock_(sock), ssl_(ssl), read_timeout_sec_(read_timeout_sec), + read_timeout_usec_(read_timeout_usec), + write_timeout_sec_(write_timeout_sec), + write_timeout_usec_(write_timeout_usec) { + SSL_clear_mode(ssl, SSL_MODE_AUTO_RETRY); +} + +inline SSLSocketStream::~SSLSocketStream() {} + +inline bool SSLSocketStream::is_readable() const { + return detail::select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0; +} + +inline bool SSLSocketStream::is_writable() const { + return select_write(sock_, write_timeout_sec_, write_timeout_usec_) > 0 && + is_socket_alive(sock_); +} + +inline ssize_t SSLSocketStream::read(char *ptr, size_t size) { + if (SSL_pending(ssl_) > 0) { + return SSL_read(ssl_, ptr, static_cast(size)); + } else if (is_readable()) { + auto ret = SSL_read(ssl_, ptr, static_cast(size)); + if (ret < 0) { + auto err = SSL_get_error(ssl_, ret); + int n = 1000; +#ifdef _WIN32 + while (--n >= 0 && (err == SSL_ERROR_WANT_READ || + (err == SSL_ERROR_SYSCALL && + WSAGetLastError() == WSAETIMEDOUT))) { +#else + while (--n >= 0 && err == SSL_ERROR_WANT_READ) { +#endif + if (SSL_pending(ssl_) > 0) { + return SSL_read(ssl_, ptr, static_cast(size)); + } else if (is_readable()) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + ret = SSL_read(ssl_, ptr, static_cast(size)); + if (ret >= 0) { return ret; } + err = SSL_get_error(ssl_, ret); + } else { + return -1; + } + } + } + return ret; + } + return -1; +} + +inline ssize_t SSLSocketStream::write(const char *ptr, size_t size) { + if (is_writable()) { + auto handle_size = static_cast( + std::min(size, (std::numeric_limits::max)())); + + auto ret = SSL_write(ssl_, ptr, static_cast(handle_size)); + if (ret < 0) { + auto err = SSL_get_error(ssl_, ret); + int n = 1000; +#ifdef _WIN32 + while (--n >= 0 && (err == SSL_ERROR_WANT_WRITE || + (err == SSL_ERROR_SYSCALL && + WSAGetLastError() == WSAETIMEDOUT))) { +#else + while (--n >= 0 && err == SSL_ERROR_WANT_WRITE) { +#endif + if (is_writable()) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + ret = SSL_write(ssl_, ptr, static_cast(handle_size)); + if (ret >= 0) { return ret; } + err = SSL_get_error(ssl_, ret); + } else { + return -1; + } + } + } + return ret; + } + return -1; +} + +inline void SSLSocketStream::get_remote_ip_and_port(std::string &ip, + int &port) const { + detail::get_remote_ip_and_port(sock_, ip, port); +} + +inline void SSLSocketStream::get_local_ip_and_port(std::string &ip, + int &port) const { + detail::get_local_ip_and_port(sock_, ip, port); +} + +inline socket_t SSLSocketStream::socket() const { return sock_; } + +static SSLInit sslinit_; + +} // namespace detail + +// SSL HTTP server implementation +inline SSLServer::SSLServer(const char *cert_path, const char *private_key_path, + const char *client_ca_cert_file_path, + const char *client_ca_cert_dir_path, + const char *private_key_password) { + ctx_ = SSL_CTX_new(TLS_server_method()); + + if (ctx_) { + SSL_CTX_set_options(ctx_, + SSL_OP_NO_COMPRESSION | + SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION); + + SSL_CTX_set_min_proto_version(ctx_, TLS1_1_VERSION); + + // add default password callback before opening encrypted private key + if (private_key_password != nullptr && (private_key_password[0] != '\0')) { + SSL_CTX_set_default_passwd_cb_userdata(ctx_, + (char *)private_key_password); + } + + if (SSL_CTX_use_certificate_chain_file(ctx_, cert_path) != 1 || + SSL_CTX_use_PrivateKey_file(ctx_, private_key_path, SSL_FILETYPE_PEM) != + 1) { + SSL_CTX_free(ctx_); + ctx_ = nullptr; + } else if (client_ca_cert_file_path || client_ca_cert_dir_path) { + SSL_CTX_load_verify_locations(ctx_, client_ca_cert_file_path, + client_ca_cert_dir_path); + + SSL_CTX_set_verify( + ctx_, SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT, nullptr); + } + } +} + +inline SSLServer::SSLServer(X509 *cert, EVP_PKEY *private_key, + X509_STORE *client_ca_cert_store) { + ctx_ = SSL_CTX_new(TLS_server_method()); + + if (ctx_) { + SSL_CTX_set_options(ctx_, + SSL_OP_NO_COMPRESSION | + SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION); + + SSL_CTX_set_min_proto_version(ctx_, TLS1_1_VERSION); + + if (SSL_CTX_use_certificate(ctx_, cert) != 1 || + SSL_CTX_use_PrivateKey(ctx_, private_key) != 1) { + SSL_CTX_free(ctx_); + ctx_ = nullptr; + } else if (client_ca_cert_store) { + SSL_CTX_set_cert_store(ctx_, client_ca_cert_store); + + SSL_CTX_set_verify( + ctx_, SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT, nullptr); + } + } +} + +inline SSLServer::SSLServer( + const std::function &setup_ssl_ctx_callback) { + ctx_ = SSL_CTX_new(TLS_method()); + if (ctx_) { + if (!setup_ssl_ctx_callback(*ctx_)) { + SSL_CTX_free(ctx_); + ctx_ = nullptr; + } + } +} + +inline SSLServer::~SSLServer() { + if (ctx_) { SSL_CTX_free(ctx_); } +} + +inline bool SSLServer::is_valid() const { return ctx_; } + +inline SSL_CTX *SSLServer::ssl_context() const { return ctx_; } + +inline bool SSLServer::process_and_close_socket(socket_t sock) { + auto ssl = detail::ssl_new( + sock, ctx_, ctx_mutex_, + [&](SSL *ssl2) { + return detail::ssl_connect_or_accept_nonblocking( + sock, ssl2, SSL_accept, read_timeout_sec_, read_timeout_usec_); + }, + [](SSL * /*ssl2*/) { return true; }); + + auto ret = false; + if (ssl) { + ret = detail::process_server_socket_ssl( + svr_sock_, ssl, sock, keep_alive_max_count_, keep_alive_timeout_sec_, + read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, + write_timeout_usec_, + [this, ssl](Stream &strm, bool close_connection, + bool &connection_closed) { + return process_request(strm, close_connection, connection_closed, + [&](Request &req) { req.ssl = ssl; }); + }); + + // Shutdown gracefully if the result seemed successful, non-gracefully if + // the connection appeared to be closed. + const bool shutdown_gracefully = ret; + detail::ssl_delete(ctx_mutex_, ssl, shutdown_gracefully); + } + + detail::shutdown_socket(sock); + detail::close_socket(sock); + return ret; +} + +// SSL HTTP client implementation +inline SSLClient::SSLClient(const std::string &host) + : SSLClient(host, 443, std::string(), std::string()) {} + +inline SSLClient::SSLClient(const std::string &host, int port) + : SSLClient(host, port, std::string(), std::string()) {} + +inline SSLClient::SSLClient(const std::string &host, int port, + const std::string &client_cert_path, + const std::string &client_key_path) + : ClientImpl(host, port, client_cert_path, client_key_path) { + ctx_ = SSL_CTX_new(TLS_client_method()); + + detail::split(&host_[0], &host_[host_.size()], '.', + [&](const char *b, const char *e) { + host_components_.emplace_back(std::string(b, e)); + }); + + if (!client_cert_path.empty() && !client_key_path.empty()) { + if (SSL_CTX_use_certificate_file(ctx_, client_cert_path.c_str(), + SSL_FILETYPE_PEM) != 1 || + SSL_CTX_use_PrivateKey_file(ctx_, client_key_path.c_str(), + SSL_FILETYPE_PEM) != 1) { + SSL_CTX_free(ctx_); + ctx_ = nullptr; + } + } +} + +inline SSLClient::SSLClient(const std::string &host, int port, + X509 *client_cert, EVP_PKEY *client_key) + : ClientImpl(host, port) { + ctx_ = SSL_CTX_new(TLS_client_method()); + + detail::split(&host_[0], &host_[host_.size()], '.', + [&](const char *b, const char *e) { + host_components_.emplace_back(std::string(b, e)); + }); + + if (client_cert != nullptr && client_key != nullptr) { + if (SSL_CTX_use_certificate(ctx_, client_cert) != 1 || + SSL_CTX_use_PrivateKey(ctx_, client_key) != 1) { + SSL_CTX_free(ctx_); + ctx_ = nullptr; + } + } +} + +inline SSLClient::~SSLClient() { + if (ctx_) { SSL_CTX_free(ctx_); } + // Make sure to shut down SSL since shutdown_ssl will resolve to the + // base function rather than the derived function once we get to the + // base class destructor, and won't free the SSL (causing a leak). + shutdown_ssl_impl(socket_, true); +} + +inline bool SSLClient::is_valid() const { return ctx_; } + +inline void SSLClient::set_ca_cert_store(X509_STORE *ca_cert_store) { + if (ca_cert_store) { + if (ctx_) { + if (SSL_CTX_get_cert_store(ctx_) != ca_cert_store) { + // Free memory allocated for old cert and use new store `ca_cert_store` + SSL_CTX_set_cert_store(ctx_, ca_cert_store); + } + } else { + X509_STORE_free(ca_cert_store); + } + } +} + +inline long SSLClient::get_openssl_verify_result() const { + return verify_result_; +} + +inline SSL_CTX *SSLClient::ssl_context() const { return ctx_; } + +inline bool SSLClient::create_and_connect_socket(Socket &socket, Error &error) { + return is_valid() && ClientImpl::create_and_connect_socket(socket, error); +} + +// Assumes that socket_mutex_ is locked and that there are no requests in flight +inline bool SSLClient::connect_with_proxy(Socket &socket, Response &res, + bool &success, Error &error) { + success = true; + Response res2; + if (!detail::process_client_socket( + socket.sock, read_timeout_sec_, read_timeout_usec_, + write_timeout_sec_, write_timeout_usec_, [&](Stream &strm) { + Request req2; + req2.method = "CONNECT"; + req2.path = host_and_port_; + return process_request(strm, req2, res2, false, error); + })) { + // Thread-safe to close everything because we are assuming there are no + // requests in flight + shutdown_ssl(socket, true); + shutdown_socket(socket); + close_socket(socket); + success = false; + return false; + } + + if (res2.status == 407) { + if (!proxy_digest_auth_username_.empty() && + !proxy_digest_auth_password_.empty()) { + std::map auth; + if (detail::parse_www_authenticate(res2, auth, true)) { + Response res3; + if (!detail::process_client_socket( + socket.sock, read_timeout_sec_, read_timeout_usec_, + write_timeout_sec_, write_timeout_usec_, [&](Stream &strm) { + Request req3; + req3.method = "CONNECT"; + req3.path = host_and_port_; + req3.headers.insert(detail::make_digest_authentication_header( + req3, auth, 1, detail::random_string(10), + proxy_digest_auth_username_, proxy_digest_auth_password_, + true)); + return process_request(strm, req3, res3, false, error); + })) { + // Thread-safe to close everything because we are assuming there are + // no requests in flight + shutdown_ssl(socket, true); + shutdown_socket(socket); + close_socket(socket); + success = false; + return false; + } + } + } else { + res = res2; + return false; + } + } + + return true; +} + +inline bool SSLClient::load_certs() { + bool ret = true; + + std::call_once(initialize_cert_, [&]() { + std::lock_guard guard(ctx_mutex_); + if (!ca_cert_file_path_.empty()) { + if (!SSL_CTX_load_verify_locations(ctx_, ca_cert_file_path_.c_str(), + nullptr)) { + ret = false; + } + } else if (!ca_cert_dir_path_.empty()) { + if (!SSL_CTX_load_verify_locations(ctx_, nullptr, + ca_cert_dir_path_.c_str())) { + ret = false; + } + } else { + auto loaded = false; +#ifdef _WIN32 + loaded = + detail::load_system_certs_on_windows(SSL_CTX_get_cert_store(ctx_)); +#elif defined(CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN) && defined(__APPLE__) +#if TARGET_OS_OSX + loaded = detail::load_system_certs_on_macos(SSL_CTX_get_cert_store(ctx_)); +#endif // TARGET_OS_OSX +#endif // _WIN32 + if (!loaded) { SSL_CTX_set_default_verify_paths(ctx_); } + } + }); + + return ret; +} + +inline bool SSLClient::initialize_ssl(Socket &socket, Error &error) { + auto ssl = detail::ssl_new( + socket.sock, ctx_, ctx_mutex_, + [&](SSL *ssl2) { + if (server_certificate_verification_) { + if (!load_certs()) { + error = Error::SSLLoadingCerts; + return false; + } + SSL_set_verify(ssl2, SSL_VERIFY_NONE, nullptr); + } + + if (!detail::ssl_connect_or_accept_nonblocking( + socket.sock, ssl2, SSL_connect, connection_timeout_sec_, + connection_timeout_usec_)) { + error = Error::SSLConnection; + return false; + } + + if (server_certificate_verification_) { + verify_result_ = SSL_get_verify_result(ssl2); + + if (verify_result_ != X509_V_OK) { + error = Error::SSLServerVerification; + return false; + } + + auto server_cert = SSL_get1_peer_certificate(ssl2); + + if (server_cert == nullptr) { + error = Error::SSLServerVerification; + return false; + } + + if (!verify_host(server_cert)) { + X509_free(server_cert); + error = Error::SSLServerVerification; + return false; + } + X509_free(server_cert); + } + + return true; + }, + [&](SSL *ssl2) { + SSL_set_tlsext_host_name(ssl2, host_.c_str()); + return true; + }); + + if (ssl) { + socket.ssl = ssl; + return true; + } + + shutdown_socket(socket); + close_socket(socket); + return false; +} + +inline void SSLClient::shutdown_ssl(Socket &socket, bool shutdown_gracefully) { + shutdown_ssl_impl(socket, shutdown_gracefully); +} + +inline void SSLClient::shutdown_ssl_impl(Socket &socket, + bool shutdown_gracefully) { + if (socket.sock == INVALID_SOCKET) { + assert(socket.ssl == nullptr); + return; + } + if (socket.ssl) { + detail::ssl_delete(ctx_mutex_, socket.ssl, shutdown_gracefully); + socket.ssl = nullptr; + } + assert(socket.ssl == nullptr); +} + +inline bool +SSLClient::process_socket(const Socket &socket, + std::function callback) { + assert(socket.ssl); + return detail::process_client_socket_ssl( + socket.ssl, socket.sock, read_timeout_sec_, read_timeout_usec_, + write_timeout_sec_, write_timeout_usec_, std::move(callback)); +} + +inline bool SSLClient::is_ssl() const { return true; } + +inline bool SSLClient::verify_host(X509 *server_cert) const { + /* Quote from RFC2818 section 3.1 "Server Identity" + + If a subjectAltName extension of type dNSName is present, that MUST + be used as the identity. Otherwise, the (most specific) Common Name + field in the Subject field of the certificate MUST be used. Although + the use of the Common Name is existing practice, it is deprecated and + Certification Authorities are encouraged to use the dNSName instead. + + Matching is performed using the matching rules specified by + [RFC2459]. If more than one identity of a given type is present in + the certificate (e.g., more than one dNSName name, a match in any one + of the set is considered acceptable.) Names may contain the wildcard + character * which is considered to match any single domain name + component or component fragment. E.g., *.a.com matches foo.a.com but + not bar.foo.a.com. f*.com matches foo.com but not bar.com. + + In some cases, the URI is specified as an IP address rather than a + hostname. In this case, the iPAddress subjectAltName must be present + in the certificate and must exactly match the IP in the URI. + + */ + return verify_host_with_subject_alt_name(server_cert) || + verify_host_with_common_name(server_cert); +} + +inline bool +SSLClient::verify_host_with_subject_alt_name(X509 *server_cert) const { + auto ret = false; + + auto type = GEN_DNS; + + struct in6_addr addr6; + struct in_addr addr; + size_t addr_len = 0; + +#ifndef __MINGW32__ + if (inet_pton(AF_INET6, host_.c_str(), &addr6)) { + type = GEN_IPADD; + addr_len = sizeof(struct in6_addr); + } else if (inet_pton(AF_INET, host_.c_str(), &addr)) { + type = GEN_IPADD; + addr_len = sizeof(struct in_addr); + } +#endif + + auto alt_names = static_cast( + X509_get_ext_d2i(server_cert, NID_subject_alt_name, nullptr, nullptr)); + + if (alt_names) { + auto dsn_matched = false; + auto ip_matched = false; + + auto count = sk_GENERAL_NAME_num(alt_names); + + for (decltype(count) i = 0; i < count && !dsn_matched; i++) { + auto val = sk_GENERAL_NAME_value(alt_names, i); + if (val->type == type) { + auto name = (const char *)ASN1_STRING_get0_data(val->d.ia5); + auto name_len = (size_t)ASN1_STRING_length(val->d.ia5); + + switch (type) { + case GEN_DNS: dsn_matched = check_host_name(name, name_len); break; + + case GEN_IPADD: + if (!memcmp(&addr6, name, addr_len) || + !memcmp(&addr, name, addr_len)) { + ip_matched = true; + } + break; + } + } + } + + if (dsn_matched || ip_matched) { ret = true; } + } + + GENERAL_NAMES_free((STACK_OF(GENERAL_NAME) *)alt_names); + return ret; +} + +inline bool SSLClient::verify_host_with_common_name(X509 *server_cert) const { + const auto subject_name = X509_get_subject_name(server_cert); + + if (subject_name != nullptr) { + char name[BUFSIZ]; + auto name_len = X509_NAME_get_text_by_NID(subject_name, NID_commonName, + name, sizeof(name)); + + if (name_len != -1) { + return check_host_name(name, static_cast(name_len)); + } + } + + return false; +} + +inline bool SSLClient::check_host_name(const char *pattern, + size_t pattern_len) const { + if (host_.size() == pattern_len && host_ == pattern) { return true; } + + // Wildcard match + // https://bugs.launchpad.net/ubuntu/+source/firefox-3.0/+bug/376484 + std::vector pattern_components; + detail::split(&pattern[0], &pattern[pattern_len], '.', + [&](const char *b, const char *e) { + pattern_components.emplace_back(std::string(b, e)); + }); + + if (host_components_.size() != pattern_components.size()) { return false; } + + auto itr = pattern_components.begin(); + for (const auto &h : host_components_) { + auto &p = *itr; + if (p != h && p != "*") { + auto partial_match = (p.size() > 0 && p[p.size() - 1] == '*' && + !p.compare(0, p.size() - 1, h)); + if (!partial_match) { return false; } + } + ++itr; + } + + return true; +} +#endif + +// Universal client implementation +inline Client::Client(const std::string &scheme_host_port) + : Client(scheme_host_port, std::string(), std::string()) {} + +inline Client::Client(const std::string &scheme_host_port, + const std::string &client_cert_path, + const std::string &client_key_path) { + const static std::regex re( + R"((?:([a-z]+):\/\/)?(?:\[([\d:]+)\]|([^:/?#]+))(?::(\d+))?)"); + + std::smatch m; + if (std::regex_match(scheme_host_port, m, re)) { + auto scheme = m[1].str(); + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + if (!scheme.empty() && (scheme != "http" && scheme != "https")) { +#else + if (!scheme.empty() && scheme != "http") { +#endif +#ifndef CPPHTTPLIB_NO_EXCEPTIONS + std::string msg = "'" + scheme + "' scheme is not supported."; + throw std::invalid_argument(msg); +#endif + return; + } + + auto is_ssl = scheme == "https"; + + auto host = m[2].str(); + if (host.empty()) { host = m[3].str(); } + + auto port_str = m[4].str(); + auto port = !port_str.empty() ? std::stoi(port_str) : (is_ssl ? 443 : 80); + + if (is_ssl) { +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + cli_ = detail::make_unique(host, port, client_cert_path, + client_key_path); + is_ssl_ = is_ssl; +#endif + } else { + cli_ = detail::make_unique(host, port, client_cert_path, + client_key_path); + } + } else { + cli_ = detail::make_unique(scheme_host_port, 80, + client_cert_path, client_key_path); + } +} + +inline Client::Client(const std::string &host, int port) + : cli_(detail::make_unique(host, port)) {} + +inline Client::Client(const std::string &host, int port, + const std::string &client_cert_path, + const std::string &client_key_path) + : cli_(detail::make_unique(host, port, client_cert_path, + client_key_path)) {} + +inline Client::~Client() {} + +inline bool Client::is_valid() const { + return cli_ != nullptr && cli_->is_valid(); +} + +inline Result Client::Get(const std::string &path) { return cli_->Get(path); } +inline Result Client::Get(const std::string &path, const Headers &headers) { + return cli_->Get(path, headers); +} +inline Result Client::Get(const std::string &path, Progress progress) { + return cli_->Get(path, std::move(progress)); +} +inline Result Client::Get(const std::string &path, const Headers &headers, + Progress progress) { + return cli_->Get(path, headers, std::move(progress)); +} +inline Result Client::Get(const std::string &path, + ContentReceiver content_receiver) { + return cli_->Get(path, std::move(content_receiver)); +} +inline Result Client::Get(const std::string &path, const Headers &headers, + ContentReceiver content_receiver) { + return cli_->Get(path, headers, std::move(content_receiver)); +} +inline Result Client::Get(const std::string &path, + ContentReceiver content_receiver, Progress progress) { + return cli_->Get(path, std::move(content_receiver), std::move(progress)); +} +inline Result Client::Get(const std::string &path, const Headers &headers, + ContentReceiver content_receiver, Progress progress) { + return cli_->Get(path, headers, std::move(content_receiver), + std::move(progress)); +} +inline Result Client::Get(const std::string &path, + ResponseHandler response_handler, + ContentReceiver content_receiver) { + return cli_->Get(path, std::move(response_handler), + std::move(content_receiver)); +} +inline Result Client::Get(const std::string &path, const Headers &headers, + ResponseHandler response_handler, + ContentReceiver content_receiver) { + return cli_->Get(path, headers, std::move(response_handler), + std::move(content_receiver)); +} +inline Result Client::Get(const std::string &path, + ResponseHandler response_handler, + ContentReceiver content_receiver, Progress progress) { + return cli_->Get(path, std::move(response_handler), + std::move(content_receiver), std::move(progress)); +} +inline Result Client::Get(const std::string &path, const Headers &headers, + ResponseHandler response_handler, + ContentReceiver content_receiver, Progress progress) { + return cli_->Get(path, headers, std::move(response_handler), + std::move(content_receiver), std::move(progress)); +} +inline Result Client::Get(const std::string &path, const Params ¶ms, + const Headers &headers, Progress progress) { + return cli_->Get(path, params, headers, progress); +} +inline Result Client::Get(const std::string &path, const Params ¶ms, + const Headers &headers, + ContentReceiver content_receiver, Progress progress) { + return cli_->Get(path, params, headers, content_receiver, progress); +} +inline Result Client::Get(const std::string &path, const Params ¶ms, + const Headers &headers, + ResponseHandler response_handler, + ContentReceiver content_receiver, Progress progress) { + return cli_->Get(path, params, headers, response_handler, content_receiver, + progress); +} + +inline Result Client::Head(const std::string &path) { return cli_->Head(path); } +inline Result Client::Head(const std::string &path, const Headers &headers) { + return cli_->Head(path, headers); +} + +inline Result Client::Post(const std::string &path) { return cli_->Post(path); } +inline Result Client::Post(const std::string &path, const Headers &headers) { + return cli_->Post(path, headers); +} +inline Result Client::Post(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type) { + return cli_->Post(path, body, content_length, content_type); +} +inline Result Client::Post(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type) { + return cli_->Post(path, headers, body, content_length, content_type); +} +inline Result Client::Post(const std::string &path, const std::string &body, + const std::string &content_type) { + return cli_->Post(path, body, content_type); +} +inline Result Client::Post(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type) { + return cli_->Post(path, headers, body, content_type); +} +inline Result Client::Post(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return cli_->Post(path, content_length, std::move(content_provider), + content_type); +} +inline Result Client::Post(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return cli_->Post(path, std::move(content_provider), content_type); +} +inline Result Client::Post(const std::string &path, const Headers &headers, + size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return cli_->Post(path, headers, content_length, std::move(content_provider), + content_type); +} +inline Result Client::Post(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return cli_->Post(path, headers, std::move(content_provider), content_type); +} +inline Result Client::Post(const std::string &path, const Params ¶ms) { + return cli_->Post(path, params); +} +inline Result Client::Post(const std::string &path, const Headers &headers, + const Params ¶ms) { + return cli_->Post(path, headers, params); +} +inline Result Client::Post(const std::string &path, + const MultipartFormDataItems &items) { + return cli_->Post(path, items); +} +inline Result Client::Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items) { + return cli_->Post(path, headers, items); +} +inline Result Client::Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const std::string &boundary) { + return cli_->Post(path, headers, items, boundary); +} +inline Result +Client::Post(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items) { + return cli_->Post(path, headers, items, provider_items); +} +inline Result Client::Put(const std::string &path) { return cli_->Put(path); } +inline Result Client::Put(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type) { + return cli_->Put(path, body, content_length, content_type); +} +inline Result Client::Put(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type) { + return cli_->Put(path, headers, body, content_length, content_type); +} +inline Result Client::Put(const std::string &path, const std::string &body, + const std::string &content_type) { + return cli_->Put(path, body, content_type); +} +inline Result Client::Put(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type) { + return cli_->Put(path, headers, body, content_type); +} +inline Result Client::Put(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return cli_->Put(path, content_length, std::move(content_provider), + content_type); +} +inline Result Client::Put(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return cli_->Put(path, std::move(content_provider), content_type); +} +inline Result Client::Put(const std::string &path, const Headers &headers, + size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return cli_->Put(path, headers, content_length, std::move(content_provider), + content_type); +} +inline Result Client::Put(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return cli_->Put(path, headers, std::move(content_provider), content_type); +} +inline Result Client::Put(const std::string &path, const Params ¶ms) { + return cli_->Put(path, params); +} +inline Result Client::Put(const std::string &path, const Headers &headers, + const Params ¶ms) { + return cli_->Put(path, headers, params); +} +inline Result Client::Put(const std::string &path, + const MultipartFormDataItems &items) { + return cli_->Put(path, items); +} +inline Result Client::Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items) { + return cli_->Put(path, headers, items); +} +inline Result Client::Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const std::string &boundary) { + return cli_->Put(path, headers, items, boundary); +} +inline Result +Client::Put(const std::string &path, const Headers &headers, + const MultipartFormDataItems &items, + const MultipartFormDataProviderItems &provider_items) { + return cli_->Put(path, headers, items, provider_items); +} +inline Result Client::Patch(const std::string &path) { + return cli_->Patch(path); +} +inline Result Client::Patch(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type) { + return cli_->Patch(path, body, content_length, content_type); +} +inline Result Client::Patch(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type) { + return cli_->Patch(path, headers, body, content_length, content_type); +} +inline Result Client::Patch(const std::string &path, const std::string &body, + const std::string &content_type) { + return cli_->Patch(path, body, content_type); +} +inline Result Client::Patch(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type) { + return cli_->Patch(path, headers, body, content_type); +} +inline Result Client::Patch(const std::string &path, size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return cli_->Patch(path, content_length, std::move(content_provider), + content_type); +} +inline Result Client::Patch(const std::string &path, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return cli_->Patch(path, std::move(content_provider), content_type); +} +inline Result Client::Patch(const std::string &path, const Headers &headers, + size_t content_length, + ContentProvider content_provider, + const std::string &content_type) { + return cli_->Patch(path, headers, content_length, std::move(content_provider), + content_type); +} +inline Result Client::Patch(const std::string &path, const Headers &headers, + ContentProviderWithoutLength content_provider, + const std::string &content_type) { + return cli_->Patch(path, headers, std::move(content_provider), content_type); +} +inline Result Client::Delete(const std::string &path) { + return cli_->Delete(path); +} +inline Result Client::Delete(const std::string &path, const Headers &headers) { + return cli_->Delete(path, headers); +} +inline Result Client::Delete(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type) { + return cli_->Delete(path, body, content_length, content_type); +} +inline Result Client::Delete(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type) { + return cli_->Delete(path, headers, body, content_length, content_type); +} +inline Result Client::Delete(const std::string &path, const std::string &body, + const std::string &content_type) { + return cli_->Delete(path, body, content_type); +} +inline Result Client::Delete(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type) { + return cli_->Delete(path, headers, body, content_type); +} +inline Result Client::Options(const std::string &path) { + return cli_->Options(path); +} +inline Result Client::Options(const std::string &path, const Headers &headers) { + return cli_->Options(path, headers); +} + +inline bool Client::send(Request &req, Response &res, Error &error) { + return cli_->send(req, res, error); +} + +inline Result Client::send(const Request &req) { return cli_->send(req); } + +inline size_t Client::is_socket_open() const { return cli_->is_socket_open(); } + +inline socket_t Client::socket() const { return cli_->socket(); } + +inline void Client::stop() { cli_->stop(); } + +inline void +Client::set_hostname_addr_map(std::map addr_map) { + cli_->set_hostname_addr_map(std::move(addr_map)); +} + +inline void Client::set_default_headers(Headers headers) { + cli_->set_default_headers(std::move(headers)); +} + +inline void Client::set_address_family(int family) { + cli_->set_address_family(family); +} + +inline void Client::set_tcp_nodelay(bool on) { cli_->set_tcp_nodelay(on); } + +inline void Client::set_socket_options(SocketOptions socket_options) { + cli_->set_socket_options(std::move(socket_options)); +} + +inline void Client::set_connection_timeout(time_t sec, time_t usec) { + cli_->set_connection_timeout(sec, usec); +} + +inline void Client::set_read_timeout(time_t sec, time_t usec) { + cli_->set_read_timeout(sec, usec); +} + +inline void Client::set_write_timeout(time_t sec, time_t usec) { + cli_->set_write_timeout(sec, usec); +} + +inline void Client::set_basic_auth(const std::string &username, + const std::string &password) { + cli_->set_basic_auth(username, password); +} +inline void Client::set_bearer_token_auth(const std::string &token) { + cli_->set_bearer_token_auth(token); +} +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void Client::set_digest_auth(const std::string &username, + const std::string &password) { + cli_->set_digest_auth(username, password); +} +#endif + +inline void Client::set_keep_alive(bool on) { cli_->set_keep_alive(on); } +inline void Client::set_follow_location(bool on) { + cli_->set_follow_location(on); +} + +inline void Client::set_url_encode(bool on) { cli_->set_url_encode(on); } + +inline void Client::set_compress(bool on) { cli_->set_compress(on); } + +inline void Client::set_decompress(bool on) { cli_->set_decompress(on); } + +inline void Client::set_interface(const std::string &intf) { + cli_->set_interface(intf); +} + +inline void Client::set_proxy(const std::string &host, int port) { + cli_->set_proxy(host, port); +} +inline void Client::set_proxy_basic_auth(const std::string &username, + const std::string &password) { + cli_->set_proxy_basic_auth(username, password); +} +inline void Client::set_proxy_bearer_token_auth(const std::string &token) { + cli_->set_proxy_bearer_token_auth(token); +} +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void Client::set_proxy_digest_auth(const std::string &username, + const std::string &password) { + cli_->set_proxy_digest_auth(username, password); +} +#endif + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void Client::enable_server_certificate_verification(bool enabled) { + cli_->enable_server_certificate_verification(enabled); +} +#endif + +inline void Client::set_logger(Logger logger) { cli_->set_logger(logger); } + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline void Client::set_ca_cert_path(const std::string &ca_cert_file_path, + const std::string &ca_cert_dir_path) { + cli_->set_ca_cert_path(ca_cert_file_path, ca_cert_dir_path); +} + +inline void Client::set_ca_cert_store(X509_STORE *ca_cert_store) { + if (is_ssl_) { + static_cast(*cli_).set_ca_cert_store(ca_cert_store); + } else { + cli_->set_ca_cert_store(ca_cert_store); + } +} + +inline long Client::get_openssl_verify_result() const { + if (is_ssl_) { + return static_cast(*cli_).get_openssl_verify_result(); + } + return -1; // NOTE: -1 doesn't match any of X509_V_ERR_??? +} + +inline SSL_CTX *Client::ssl_context() const { + if (is_ssl_) { return static_cast(*cli_).ssl_context(); } + return nullptr; +} +#endif + +// ---------------------------------------------------------------------------- + +} // namespace httplib + +#if defined(_WIN32) && defined(CPPHTTPLIB_USE_POLL) +#undef poll +#endif + +#endif // CPPHTTPLIB_HTTPLIB_H diff --git a/ctransformers/models/submodules/ggllm.cpp/examples/server/json.hpp b/ctransformers/models/submodules/ggllm.cpp/examples/server/json.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4d1a37ad7cb874769d911fc3362d567da3aca291 --- /dev/null +++ b/ctransformers/models/submodules/ggllm.cpp/examples/server/json.hpp @@ -0,0 +1,24596 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + +/****************************************************************************\ + * Note on documentation: The source files contain links to the online * + * documentation of the public API at https://json.nlohmann.me. This URL * + * contains the most recent documentation and should also be applicable to * + * previous versions; documentation for deprecated functions is not * + * removed, but marked deprecated. See "Generate documentation" section in * + * file docs/README.md. * +\****************************************************************************/ + +#ifndef INCLUDE_NLOHMANN_JSON_HPP_ +#define INCLUDE_NLOHMANN_JSON_HPP_ + +#include // all_of, find, for_each +#include // nullptr_t, ptrdiff_t, size_t +#include // hash, less +#include // initializer_list +#ifndef JSON_NO_IO + #include // istream, ostream +#endif // JSON_NO_IO +#include // random_access_iterator_tag +#include // unique_ptr +#include // accumulate +#include // string, stoi, to_string +#include // declval, forward, move, pair, swap +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// This file contains all macro definitions affecting or depending on the ABI + +#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK + #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) + #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 2 + #warning "Already included a different version of the library!" + #endif + #endif +#endif + +#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_PATCH 2 // NOLINT(modernize-macro-to-enum) + +#ifndef JSON_DIAGNOSTICS + #define JSON_DIAGNOSTICS 0 +#endif + +#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 +#endif + +#if JSON_DIAGNOSTICS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS +#endif + +#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp +#else + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION + #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0 +#endif + +// Construct the namespace ABI tags component +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) + +#define NLOHMANN_JSON_ABI_TAGS \ + NLOHMANN_JSON_ABI_TAGS_CONCAT( \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + +// Construct the namespace version component +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ + _v ## major ## _ ## minor ## _ ## patch +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) + +#if NLOHMANN_JSON_NAMESPACE_NO_VERSION +#define NLOHMANN_JSON_NAMESPACE_VERSION +#else +#define NLOHMANN_JSON_NAMESPACE_VERSION \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \ + NLOHMANN_JSON_VERSION_MINOR, \ + NLOHMANN_JSON_VERSION_PATCH) +#endif + +// Combine namespace components +#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b +#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \ + NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) + +#ifndef NLOHMANN_JSON_NAMESPACE +#define NLOHMANN_JSON_NAMESPACE \ + nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN +#define NLOHMANN_JSON_NAMESPACE_BEGIN \ + namespace nlohmann \ + { \ + inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) \ + { +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_END +#define NLOHMANN_JSON_NAMESPACE_END \ + } /* namespace (inline namespace) NOLINT(readability/namespace) */ \ + } // namespace nlohmann +#endif + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // transform +#include // array +#include // forward_list +#include // inserter, front_inserter, end +#include // map +#include // string +#include // tuple, make_tuple +#include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible +#include // unordered_map +#include // pair, declval +#include // valarray + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // nullptr_t +#include // exception +#include // runtime_error +#include // to_string +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // uint8_t +#include // string + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // declval, pair +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template struct make_void +{ + using type = void; +}; +template using void_t = typename make_void::type; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +// https://en.cppreference.com/w/cpp/experimental/is_detected +struct nonesuch +{ + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const&) = delete; + nonesuch(nonesuch const&&) = delete; + void operator=(nonesuch const&) = delete; + void operator=(nonesuch&&) = delete; +}; + +template class Op, + class... Args> +struct detector +{ + using value_t = std::false_type; + using type = Default; +}; + +template class Op, class... Args> +struct detector>, Op, Args...> +{ + using value_t = std::true_type; + using type = Op; +}; + +template class Op, class... Args> +using is_detected = typename detector::value_t; + +template class Op, class... Args> +struct is_detected_lazy : is_detected { }; + +template class Op, class... Args> +using detected_t = typename detector::type; + +template class Op, class... Args> +using detected_or = detector; + +template class Op, class... Args> +using detected_or_t = typename detected_or::type; + +template class Op, class... Args> +using is_detected_exact = std::is_same>; + +template class Op, class... Args> +using is_detected_convertible = + std::is_convertible, To>; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + + +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson +// SPDX-License-Identifier: MIT + +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + */ + +#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15) +#if defined(JSON_HEDLEY_VERSION) + #undef JSON_HEDLEY_VERSION +#endif +#define JSON_HEDLEY_VERSION 15 + +#if defined(JSON_HEDLEY_STRINGIFY_EX) + #undef JSON_HEDLEY_STRINGIFY_EX +#endif +#define JSON_HEDLEY_STRINGIFY_EX(x) #x + +#if defined(JSON_HEDLEY_STRINGIFY) + #undef JSON_HEDLEY_STRINGIFY +#endif +#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x) + +#if defined(JSON_HEDLEY_CONCAT_EX) + #undef JSON_HEDLEY_CONCAT_EX +#endif +#define JSON_HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(JSON_HEDLEY_CONCAT) + #undef JSON_HEDLEY_CONCAT +#endif +#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b) + +#if defined(JSON_HEDLEY_CONCAT3_EX) + #undef JSON_HEDLEY_CONCAT3_EX +#endif +#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(JSON_HEDLEY_CONCAT3) + #undef JSON_HEDLEY_CONCAT3 +#endif +#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(JSON_HEDLEY_VERSION_ENCODE) + #undef JSON_HEDLEY_VERSION_ENCODE +#endif +#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR) + #undef JSON_HEDLEY_VERSION_DECODE_MAJOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR) + #undef JSON_HEDLEY_VERSION_DECODE_MINOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION) + #undef JSON_HEDLEY_VERSION_DECODE_REVISION +#endif +#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(JSON_HEDLEY_GNUC_VERSION) + #undef JSON_HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK) + #undef JSON_HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GNUC_VERSION) + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION) + #undef JSON_HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK) + #undef JSON_HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(JSON_HEDLEY_MSVC_VERSION) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION) + #undef JSON_HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_VERSION) + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #undef JSON_HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) + #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION) + #undef JSON_HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) + #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION_CHECK) + #undef JSON_HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PGI_VERSION) + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #undef JSON_HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK) + #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION) + #undef JSON_HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION_CHECK) + #undef JSON_HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_ARM_VERSION) + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION) + #undef JSON_HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION_CHECK) + #undef JSON_HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IBM_VERSION) + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_VERSION) + #undef JSON_HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +#if (__TI_COMPILER_VERSION__ >= 16000000) + #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif +#endif + +#if defined(JSON_HEDLEY_TI_VERSION_CHECK) + #undef JSON_HEDLEY_TI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_VERSION) + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #undef JSON_HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) + #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #undef JSON_HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) + #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #undef JSON_HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) + #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK) + #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #undef JSON_HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) + #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #undef JSON_HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) + #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #undef JSON_HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) + #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION) + #undef JSON_HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) + #if defined(_RELEASE_PATCHLEVEL) + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) + #else + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK) + #undef JSON_HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_CRAY_VERSION) + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION) + #undef JSON_HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) + #if __VER__ > 1000 + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) + #else + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION_CHECK) + #undef JSON_HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IAR_VERSION) + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION) + #undef JSON_HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) + #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK) + #undef JSON_HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION) + #undef JSON_HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) + #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION_CHECK) + #undef JSON_HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_DMC_VERSION) + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #undef JSON_HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) + #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK) + #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION) + #undef JSON_HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) + #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK) + #undef JSON_HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PELLES_VERSION) + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #undef JSON_HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) + #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK) + #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION) + #undef JSON_HEDLEY_GCC_VERSION +#endif +#if \ + defined(JSON_HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(JSON_HEDLEY_INTEL_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_ARM_VERSION) && \ + !defined(JSON_HEDLEY_CRAY_VERSION) && \ + !defined(JSON_HEDLEY_TI_VERSION) && \ + !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL430_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_BUILTIN) + #undef JSON_HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else + #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN) + #undef JSON_HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN) + #undef JSON_HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_FEATURE) + #undef JSON_HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else + #define JSON_HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE) + #undef JSON_HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_FEATURE) + #undef JSON_HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_EXTENSION) + #undef JSON_HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else + #define JSON_HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION) + #undef JSON_HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION) + #undef JSON_HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_WARNING) + #undef JSON_HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else + #define JSON_HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_WARNING) + #undef JSON_HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_WARNING) + #undef JSON_HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) + #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_PRAGMA(value) __pragma(value) +#else + #define JSON_HEDLEY_PRAGMA(value) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH) + #undef JSON_HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(JSON_HEDLEY_DIAGNOSTIC_POP) + #undef JSON_HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) + #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_PUSH + #define JSON_HEDLEY_DIAGNOSTIC_POP +#endif + +/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat") +# if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(JSON_HEDLEY_CONST_CAST) + #undef JSON_HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_REINTERPRET_CAST) + #undef JSON_HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_STATIC_CAST) + #undef JSON_HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else + #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_CPP_CAST) + #undef JSON_HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast") +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define JSON_HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunused-function") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(JSON_HEDLEY_DEPRECATED) + #undef JSON_HEDLEY_DEPRECATED +#endif +#if defined(JSON_HEDLEY_DEPRECATED_FOR) + #undef JSON_HEDLEY_DEPRECATED_FOR +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) + #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated") + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else + #define JSON_HEDLEY_DEPRECATED(since) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(JSON_HEDLEY_UNAVAILABLE) + #undef JSON_HEDLEY_UNAVAILABLE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else + #define JSON_HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ + #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_ + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else + #define JSON_HEDLEY_WARN_UNUSED_RESULT + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(JSON_HEDLEY_SENTINEL) + #undef JSON_HEDLEY_SENTINEL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else + #define JSON_HEDLEY_SENTINEL(position) +#endif + +#if defined(JSON_HEDLEY_NO_RETURN) + #undef JSON_HEDLEY_NO_RETURN +#endif +#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NO_RETURN __noreturn +#elif \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + #define JSON_HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) + #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NO_RETURN __attribute((noreturn)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#else + #define JSON_HEDLEY_NO_RETURN +#endif + +#if defined(JSON_HEDLEY_NO_ESCAPE) + #undef JSON_HEDLEY_NO_ESCAPE +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape) + #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else + #define JSON_HEDLEY_NO_ESCAPE +#endif + +#if defined(JSON_HEDLEY_UNREACHABLE) + #undef JSON_HEDLEY_UNREACHABLE +#endif +#if defined(JSON_HEDLEY_UNREACHABLE_RETURN) + #undef JSON_HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(JSON_HEDLEY_ASSUME) + #undef JSON_HEDLEY_ASSUME +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_ASSUME(expr) __assume(expr) +#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume) + #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #if defined(__cplusplus) + #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr) + #else + #define JSON_HEDLEY_ASSUME(expr) _nassert(expr) + #endif +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(JSON_HEDLEY_ASSUME) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif +#if !defined(JSON_HEDLEY_ASSUME) + #if defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1))) + #else + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr) + #endif +#endif +#if defined(JSON_HEDLEY_UNREACHABLE) + #if \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value)) + #else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE() + #endif +#else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif + +JSON_HEDLEY_DIAGNOSTIC_PUSH +#if JSON_HEDLEY_HAS_WARNING("-Wpedantic") + #pragma clang diagnostic ignored "-Wpedantic" +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) + #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) + #if defined(__clang__) + #pragma clang diagnostic ignored "-Wvariadic-macros" + #elif defined(JSON_HEDLEY_GCC_VERSION) + #pragma GCC diagnostic ignored "-Wvariadic-macros" + #endif +#endif +#if defined(JSON_HEDLEY_NON_NULL) + #undef JSON_HEDLEY_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else + #define JSON_HEDLEY_NON_NULL(...) +#endif +JSON_HEDLEY_DIAGNOSTIC_POP + +#if defined(JSON_HEDLEY_PRINTF_FORMAT) + #undef JSON_HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(format) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(JSON_HEDLEY_CONSTEXPR) + #undef JSON_HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) + #endif +#endif +#if !defined(JSON_HEDLEY_CONSTEXPR) + #define JSON_HEDLEY_CONSTEXPR +#endif + +#if defined(JSON_HEDLEY_PREDICT) + #undef JSON_HEDLEY_PREDICT +#endif +#if defined(JSON_HEDLEY_LIKELY) + #undef JSON_HEDLEY_LIKELY +#endif +#if defined(JSON_HEDLEY_UNLIKELY) + #undef JSON_HEDLEY_UNLIKELY +#endif +#if defined(JSON_HEDLEY_UNPREDICTABLE) + #undef JSON_HEDLEY_UNPREDICTABLE +#endif +#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable) + #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_LIKELY(expr) (!!(expr)) +# define JSON_HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(JSON_HEDLEY_UNPREDICTABLE) + #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(JSON_HEDLEY_MALLOC) + #undef JSON_HEDLEY_MALLOC +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_MALLOC __attribute__((__malloc__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_MALLOC __declspec(restrict) +#else + #define JSON_HEDLEY_MALLOC +#endif + +#if defined(JSON_HEDLEY_PURE) + #undef JSON_HEDLEY_PURE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PURE __attribute__((__pure__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_CONST) + #undef JSON_HEDLEY_CONST +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(const) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_CONST __attribute__((__const__)) +#elif \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_CONST _Pragma("no_side_effect") +#else + #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_RESTRICT) + #undef JSON_HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT restrict +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RESTRICT __restrict +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT _Restrict +#else + #define JSON_HEDLEY_RESTRICT +#endif + +#if defined(JSON_HEDLEY_INLINE) + #undef JSON_HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) + #define JSON_HEDLEY_INLINE inline +#elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0) + #define JSON_HEDLEY_INLINE __inline__ +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_INLINE __inline +#else + #define JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_ALWAYS_INLINE) + #undef JSON_HEDLEY_ALWAYS_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_NEVER_INLINE) + #undef JSON_HEDLEY_NEVER_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#else + #define JSON_HEDLEY_NEVER_INLINE +#endif + +#if defined(JSON_HEDLEY_PRIVATE) + #undef JSON_HEDLEY_PRIVATE +#endif +#if defined(JSON_HEDLEY_PUBLIC) + #undef JSON_HEDLEY_PUBLIC +#endif +#if defined(JSON_HEDLEY_IMPORT) + #undef JSON_HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC __declspec(dllexport) +# define JSON_HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define JSON_HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC +# endif +# define JSON_HEDLEY_IMPORT extern +#endif + +#if defined(JSON_HEDLEY_NO_THROW) + #undef JSON_HEDLEY_NO_THROW +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NO_THROW __declspec(nothrow) +#else + #define JSON_HEDLEY_NO_THROW +#endif + +#if defined(JSON_HEDLEY_FALL_THROUGH) + #undef JSON_HEDLEY_FALL_THROUGH +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ + #define JSON_HEDLEY_FALL_THROUGH __fallthrough +#else + #define JSON_HEDLEY_FALL_THROUGH +#endif + +#if defined(JSON_HEDLEY_RETURNS_NON_NULL) + #undef JSON_HEDLEY_RETURNS_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ + #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else + #define JSON_HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(JSON_HEDLEY_ARRAY_PARAM) + #undef JSON_HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_ARRAY_PARAM(name) (name) +#else + #define JSON_HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(JSON_HEDLEY_IS_CONSTANT) + #undef JSON_HEDLEY_IS_CONSTANT +#endif +#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR) + #undef JSON_HEDLEY_REQUIRE_CONSTEXPR +#endif +/* JSON_HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #undef JSON_HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +#endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(JSON_HEDLEY_SUNPRO_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION)) || \ + (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +#endif +# elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + defined(JSON_HEDLEY_INTEL_VERSION) || \ + defined(JSON_HEDLEY_TINYC_VERSION) || \ + defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(JSON_HEDLEY_TI_CL2000_VERSION) || \ + defined(JSON_HEDLEY_TI_CL6X_VERSION) || \ + defined(JSON_HEDLEY_TI_CL7X_VERSION) || \ + defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ +((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) (0) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(JSON_HEDLEY_BEGIN_C_DECLS) + #undef JSON_HEDLEY_BEGIN_C_DECLS +#endif +#if defined(JSON_HEDLEY_END_C_DECLS) + #undef JSON_HEDLEY_END_C_DECLS +#endif +#if defined(JSON_HEDLEY_C_DECL) + #undef JSON_HEDLEY_C_DECL +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" { + #define JSON_HEDLEY_END_C_DECLS } + #define JSON_HEDLEY_C_DECL extern "C" +#else + #define JSON_HEDLEY_BEGIN_C_DECLS + #define JSON_HEDLEY_END_C_DECLS + #define JSON_HEDLEY_C_DECL +#endif + +#if defined(JSON_HEDLEY_STATIC_ASSERT) + #undef JSON_HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(JSON_HEDLEY_NULL) + #undef JSON_HEDLEY_NULL +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) + #elif defined(NULL) + #define JSON_HEDLEY_NULL NULL + #else + #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0) + #endif +#elif defined(NULL) + #define JSON_HEDLEY_NULL NULL +#else + #define JSON_HEDLEY_NULL ((void*) 0) +#endif + +#if defined(JSON_HEDLEY_MESSAGE) + #undef JSON_HEDLEY_MESSAGE +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_MESSAGE(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(message msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg) +#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_WARNING) + #undef JSON_HEDLEY_WARNING +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_WARNING(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(clang warning msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_REQUIRE) + #undef JSON_HEDLEY_REQUIRE +#endif +#if defined(JSON_HEDLEY_REQUIRE_MSG) + #undef JSON_HEDLEY_REQUIRE_MSG +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat") +# define JSON_HEDLEY_REQUIRE(expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define JSON_HEDLEY_REQUIRE(expr) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(JSON_HEDLEY_FLAGS) + #undef JSON_HEDLEY_FLAGS +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) + #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else + #define JSON_HEDLEY_FLAGS +#endif + +#if defined(JSON_HEDLEY_FLAGS_CAST) + #undef JSON_HEDLEY_FLAGS_CAST +#endif +#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(JSON_HEDLEY_EMPTY_BASES) + #undef JSON_HEDLEY_EMPTY_BASES +#endif +#if \ + (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else + #define JSON_HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN) + #undef JSON_HEDLEY_CLANG_HAS_BUILTIN +#endif +#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin) + +#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE) + #undef JSON_HEDLEY_CLANG_HAS_FEATURE +#endif +#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature) + +#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION) + #undef JSON_HEDLEY_CLANG_HAS_EXTENSION +#endif +#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension) + +#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_WARNING) + #undef JSON_HEDLEY_CLANG_HAS_WARNING +#endif +#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */ + + +// This file contains all internal macro definitions (except those affecting ABI) +// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them + +// #include + + +// exclude unsupported compilers +#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) + #if defined(__clang__) + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 + #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 + #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #endif +#endif + +// C++ language standard detection +// if the user manually specified the used c++ version this is skipped +#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) + #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) + #define JSON_HAS_CPP_20 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #define JSON_HAS_CPP_14 + #endif + // the cpp 11 flag is always specified because it is the minimal required version + #define JSON_HAS_CPP_11 +#endif + +#ifdef __has_include + #if __has_include() + #include + #endif +#endif + +#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM) + #ifdef JSON_HAS_CPP_17 + #if defined(__cpp_lib_filesystem) + #define JSON_HAS_FILESYSTEM 1 + #elif defined(__cpp_lib_experimental_filesystem) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif !defined(__has_include) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #endif + + // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/ + #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__clang_major__) && __clang_major__ < 7 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support + #if defined(_MSC_VER) && _MSC_VER < 1914 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before iOS 13 + #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before macOS Catalina + #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + #endif +#endif + +#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_FILESYSTEM + #define JSON_HAS_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_THREE_WAY_COMPARISON + #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \ + && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L + #define JSON_HAS_THREE_WAY_COMPARISON 1 + #else + #define JSON_HAS_THREE_WAY_COMPARISON 0 + #endif +#endif + +#ifndef JSON_HAS_RANGES + // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error + #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427 + #define JSON_HAS_RANGES 0 + #elif defined(__cpp_lib_ranges) + #define JSON_HAS_RANGES 1 + #else + #define JSON_HAS_RANGES 0 + #endif +#endif + +#ifdef JSON_HAS_CPP_17 + #define JSON_INLINE_VARIABLE inline +#else + #define JSON_INLINE_VARIABLE +#endif + +#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address) + #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]] +#else + #define JSON_NO_UNIQUE_ADDRESS +#endif + +// disable documentation warnings on clang +#if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wdocumentation" + #pragma clang diagnostic ignored "-Wdocumentation-unknown-command" +#endif + +// allow disabling exceptions +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) + #define JSON_THROW(exception) throw exception + #define JSON_TRY try + #define JSON_CATCH(exception) catch(exception) + #define JSON_INTERNAL_CATCH(exception) catch(exception) +#else + #include + #define JSON_THROW(exception) std::abort() + #define JSON_TRY if(true) + #define JSON_CATCH(exception) if(false) + #define JSON_INTERNAL_CATCH(exception) if(false) +#endif + +// override exception macros +#if defined(JSON_THROW_USER) + #undef JSON_THROW + #define JSON_THROW JSON_THROW_USER +#endif +#if defined(JSON_TRY_USER) + #undef JSON_TRY + #define JSON_TRY JSON_TRY_USER +#endif +#if defined(JSON_CATCH_USER) + #undef JSON_CATCH + #define JSON_CATCH JSON_CATCH_USER + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_CATCH_USER +#endif +#if defined(JSON_INTERNAL_CATCH_USER) + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER +#endif + +// allow overriding assert +#if !defined(JSON_ASSERT) + #include // assert + #define JSON_ASSERT(x) assert(x) +#endif + +// allow to access some private functions (needed by the test suite) +#if defined(JSON_TESTS_PRIVATE) + #define JSON_PRIVATE_UNLESS_TESTED public +#else + #define JSON_PRIVATE_UNLESS_TESTED private +#endif + +/*! +@brief macro to briefly define a mapping between an enum and JSON +@def NLOHMANN_JSON_SERIALIZE_ENUM +@since version 3.4.0 +*/ +#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ + template \ + inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [e](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.first == e; \ + }); \ + j = ((it != std::end(m)) ? it : std::begin(m))->second; \ + } \ + template \ + inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [&j](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.second == j; \ + }); \ + e = ((it != std::end(m)) ? it : std::begin(m))->first; \ + } + +// Ugly macros to avoid uglier copy-paste when specializing basic_json. They +// may be removed in the future once the class is split. + +#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ + template class ObjectType, \ + template class ArrayType, \ + class StringType, class BooleanType, class NumberIntegerType, \ + class NumberUnsignedType, class NumberFloatType, \ + template class AllocatorType, \ + template class JSONSerializer, \ + class BinaryType> + +#define NLOHMANN_BASIC_JSON_TPL \ + basic_json + +// Macros to simplify conversion from/to types + +#define NLOHMANN_JSON_EXPAND( x ) x +#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME +#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \ + NLOHMANN_JSON_PASTE64, \ + NLOHMANN_JSON_PASTE63, \ + NLOHMANN_JSON_PASTE62, \ + NLOHMANN_JSON_PASTE61, \ + NLOHMANN_JSON_PASTE60, \ + NLOHMANN_JSON_PASTE59, \ + NLOHMANN_JSON_PASTE58, \ + NLOHMANN_JSON_PASTE57, \ + NLOHMANN_JSON_PASTE56, \ + NLOHMANN_JSON_PASTE55, \ + NLOHMANN_JSON_PASTE54, \ + NLOHMANN_JSON_PASTE53, \ + NLOHMANN_JSON_PASTE52, \ + NLOHMANN_JSON_PASTE51, \ + NLOHMANN_JSON_PASTE50, \ + NLOHMANN_JSON_PASTE49, \ + NLOHMANN_JSON_PASTE48, \ + NLOHMANN_JSON_PASTE47, \ + NLOHMANN_JSON_PASTE46, \ + NLOHMANN_JSON_PASTE45, \ + NLOHMANN_JSON_PASTE44, \ + NLOHMANN_JSON_PASTE43, \ + NLOHMANN_JSON_PASTE42, \ + NLOHMANN_JSON_PASTE41, \ + NLOHMANN_JSON_PASTE40, \ + NLOHMANN_JSON_PASTE39, \ + NLOHMANN_JSON_PASTE38, \ + NLOHMANN_JSON_PASTE37, \ + NLOHMANN_JSON_PASTE36, \ + NLOHMANN_JSON_PASTE35, \ + NLOHMANN_JSON_PASTE34, \ + NLOHMANN_JSON_PASTE33, \ + NLOHMANN_JSON_PASTE32, \ + NLOHMANN_JSON_PASTE31, \ + NLOHMANN_JSON_PASTE30, \ + NLOHMANN_JSON_PASTE29, \ + NLOHMANN_JSON_PASTE28, \ + NLOHMANN_JSON_PASTE27, \ + NLOHMANN_JSON_PASTE26, \ + NLOHMANN_JSON_PASTE25, \ + NLOHMANN_JSON_PASTE24, \ + NLOHMANN_JSON_PASTE23, \ + NLOHMANN_JSON_PASTE22, \ + NLOHMANN_JSON_PASTE21, \ + NLOHMANN_JSON_PASTE20, \ + NLOHMANN_JSON_PASTE19, \ + NLOHMANN_JSON_PASTE18, \ + NLOHMANN_JSON_PASTE17, \ + NLOHMANN_JSON_PASTE16, \ + NLOHMANN_JSON_PASTE15, \ + NLOHMANN_JSON_PASTE14, \ + NLOHMANN_JSON_PASTE13, \ + NLOHMANN_JSON_PASTE12, \ + NLOHMANN_JSON_PASTE11, \ + NLOHMANN_JSON_PASTE10, \ + NLOHMANN_JSON_PASTE9, \ + NLOHMANN_JSON_PASTE8, \ + NLOHMANN_JSON_PASTE7, \ + NLOHMANN_JSON_PASTE6, \ + NLOHMANN_JSON_PASTE5, \ + NLOHMANN_JSON_PASTE4, \ + NLOHMANN_JSON_PASTE3, \ + NLOHMANN_JSON_PASTE2, \ + NLOHMANN_JSON_PASTE1)(__VA_ARGS__)) +#define NLOHMANN_JSON_PASTE2(func, v1) func(v1) +#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2) +#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3) +#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4) +#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5) +#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6) +#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7) +#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8) +#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9) +#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10) +#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) +#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) +#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) +#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) +#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) +#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) +#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) +#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) +#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) +#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) +#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) +#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) +#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) +#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) +#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) +#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) +#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) +#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) +#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) +#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) +#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) +#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) +#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) +#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) +#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) +#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) +#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) +#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) +#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) +#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) +#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) +#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) +#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) +#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) +#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) +#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) +#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) +#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) +#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) +#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) +#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) +#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) +#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) +#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) +#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) +#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) +#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) +#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) +#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) +#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) +#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) +#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) +#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) + +#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; +#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); +#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { Type nlohmann_json_default_obj; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { Type nlohmann_json_default_obj; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + + +// inspired from https://stackoverflow.com/a/26745591 +// allows to call any std function as if (e.g. with begin): +// using std::begin; begin(x); +// +// it allows using the detected idiom to retrieve the return type +// of such an expression +#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name) \ + namespace detail { \ + using std::std_name; \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + } \ + \ + namespace detail2 { \ + struct std_name##_tag \ + { \ + }; \ + \ + template \ + std_name##_tag std_name(T&&...); \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + \ + template \ + struct would_call_std_##std_name \ + { \ + static constexpr auto const value = ::nlohmann::detail:: \ + is_detected_exact::value; \ + }; \ + } /* namespace detail2 */ \ + \ + template \ + struct would_call_std_##std_name : detail2::would_call_std_##std_name \ + { \ + } + +#ifndef JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_USE_IMPLICIT_CONVERSIONS 1 +#endif + +#if JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_EXPLICIT +#else + #define JSON_EXPLICIT explicit +#endif + +#ifndef JSON_DISABLE_ENUM_SERIALIZATION + #define JSON_DISABLE_ENUM_SERIALIZATION 0 +#endif + +#ifndef JSON_USE_GLOBAL_UDLS + #define JSON_USE_GLOBAL_UDLS 1 +#endif + +#if JSON_HAS_THREE_WAY_COMPARISON + #include // partial_ordering +#endif + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/////////////////////////// +// JSON type enumeration // +/////////////////////////// + +/*! +@brief the JSON type enumeration + +This enumeration collects the different JSON types. It is internally used to +distinguish the stored values, and the functions @ref basic_json::is_null(), +@ref basic_json::is_object(), @ref basic_json::is_array(), +@ref basic_json::is_string(), @ref basic_json::is_boolean(), +@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), +@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), +@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and +@ref basic_json::is_structured() rely on it. + +@note There are three enumeration entries (number_integer, number_unsigned, and +number_float), because the library distinguishes these three types for numbers: +@ref basic_json::number_unsigned_t is used for unsigned integers, +@ref basic_json::number_integer_t is used for signed integers, and +@ref basic_json::number_float_t is used for floating-point numbers or to +approximate integers which do not fit in the limits of their respective type. + +@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON +value with the default value for a given type + +@since version 1.0.0 +*/ +enum class value_t : std::uint8_t +{ + null, ///< null value + object, ///< object (unordered set of name/value pairs) + array, ///< array (ordered collection of values) + string, ///< string value + boolean, ///< boolean value + number_integer, ///< number value (signed integer) + number_unsigned, ///< number value (unsigned integer) + number_float, ///< number value (floating-point) + binary, ///< binary array (ordered collection of bytes) + discarded ///< discarded by the parser callback function +}; + +/*! +@brief comparison operator for JSON types + +Returns an ordering that is similar to Python: +- order: null < boolean < number < object < array < string < binary +- furthermore, each type is not smaller than itself +- discarded values are not comparable +- binary is represented as a b"" string in python and directly comparable to a + string; however, making a binary array directly comparable with a string would + be surprising behavior in a JSON file. + +@since version 1.0.0 +*/ +#if JSON_HAS_THREE_WAY_COMPARISON + inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD* +#else + inline bool operator<(const value_t lhs, const value_t rhs) noexcept +#endif +{ + static constexpr std::array order = {{ + 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, + 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, + 6 /* binary */ + } + }; + + const auto l_index = static_cast(lhs); + const auto r_index = static_cast(rhs); +#if JSON_HAS_THREE_WAY_COMPARISON + if (l_index < order.size() && r_index < order.size()) + { + return order[l_index] <=> order[r_index]; // *NOPAD* + } + return std::partial_ordering::unordered; +#else + return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index]; +#endif +} + +// GCC selects the built-in operator< over an operator rewritten from +// a user-defined spaceship operator +// Clang, MSVC, and ICC select the rewritten candidate +// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200) +#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__) +inline bool operator<(const value_t lhs, const value_t rhs) noexcept +{ + return std::is_lt(lhs <=> rhs); // *NOPAD* +} +#endif + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/*! +@brief replace all occurrences of a substring by another string + +@param[in,out] s the string to manipulate; changed so that all + occurrences of @a f are replaced with @a t +@param[in] f the substring to replace with @a t +@param[in] t the string to replace @a f + +@pre The search string @a f must not be empty. **This precondition is +enforced with an assertion.** + +@since version 2.0.0 +*/ +template +inline void replace_substring(StringType& s, const StringType& f, + const StringType& t) +{ + JSON_ASSERT(!f.empty()); + for (auto pos = s.find(f); // find first occurrence of f + pos != StringType::npos; // make sure f was found + s.replace(pos, f.size(), t), // replace with t, and + pos = s.find(f, pos + t.size())) // find next occurrence of f + {} +} + +/*! + * @brief string escaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to escape + * @return escaped string + * + * Note the order of escaping "~" to "~0" and "/" to "~1" is important. + */ +template +inline StringType escape(StringType s) +{ + replace_substring(s, StringType{"~"}, StringType{"~0"}); + replace_substring(s, StringType{"/"}, StringType{"~1"}); + return s; +} + +/*! + * @brief string unescaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to unescape + * @return unescaped string + * + * Note the order of escaping "~1" to "/" and "~0" to "~" is important. + */ +template +static void unescape(StringType& s) +{ + replace_substring(s, StringType{"~1"}, StringType{"/"}); + replace_substring(s, StringType{"~0"}, StringType{"~"}); +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // size_t + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-FileCopyrightText: 2018 The Abseil Authors +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type +#include // index_sequence, make_index_sequence, index_sequence_for + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +using uncvref_t = typename std::remove_cv::type>::type; + +#ifdef JSON_HAS_CPP_14 + +// the following utilities are natively available in C++14 +using std::enable_if_t; +using std::index_sequence; +using std::make_index_sequence; +using std::index_sequence_for; + +#else + +// alias templates to reduce boilerplate +template +using enable_if_t = typename std::enable_if::type; + +// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h +// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. + +//// START OF CODE FROM GOOGLE ABSEIL + +// integer_sequence +// +// Class template representing a compile-time integer sequence. An instantiation +// of `integer_sequence` has a sequence of integers encoded in its +// type through its template arguments (which is a common need when +// working with C++11 variadic templates). `absl::integer_sequence` is designed +// to be a drop-in replacement for C++14's `std::integer_sequence`. +// +// Example: +// +// template< class T, T... Ints > +// void user_function(integer_sequence); +// +// int main() +// { +// // user_function's `T` will be deduced to `int` and `Ints...` +// // will be deduced to `0, 1, 2, 3, 4`. +// user_function(make_integer_sequence()); +// } +template +struct integer_sequence +{ + using value_type = T; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +// index_sequence +// +// A helper template for an `integer_sequence` of `size_t`, +// `absl::index_sequence` is designed to be a drop-in replacement for C++14's +// `std::index_sequence`. +template +using index_sequence = integer_sequence; + +namespace utility_internal +{ + +template +struct Extend; + +// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency. +template +struct Extend, SeqSize, 0> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >; +}; + +template +struct Extend, SeqSize, 1> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >; +}; + +// Recursion helper for 'make_integer_sequence'. +// 'Gen::type' is an alias for 'integer_sequence'. +template +struct Gen +{ + using type = + typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type; +}; + +template +struct Gen +{ + using type = integer_sequence; +}; + +} // namespace utility_internal + +// Compile-time sequences of integers + +// make_integer_sequence +// +// This template alias is equivalent to +// `integer_sequence`, and is designed to be a drop-in +// replacement for C++14's `std::make_integer_sequence`. +template +using make_integer_sequence = typename utility_internal::Gen::type; + +// make_index_sequence +// +// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`, +// and is designed to be a drop-in replacement for C++14's +// `std::make_index_sequence`. +template +using make_index_sequence = make_integer_sequence; + +// index_sequence_for +// +// Converts a typename pack into an index sequence of the same length, and +// is designed to be a drop-in replacement for C++14's +// `std::index_sequence_for()` +template +using index_sequence_for = make_index_sequence; + +//// END OF CODE FROM GOOGLE ABSEIL + +#endif + +// dispatch utility (taken from ranges-v3) +template struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + +// taken from ranges-v3 +template +struct static_const +{ + static JSON_INLINE_VARIABLE constexpr T value{}; +}; + +#ifndef JSON_HAS_CPP_17 + template + constexpr T static_const::value; +#endif + +template +inline constexpr std::array make_array(Args&& ... args) +{ + return std::array {{static_cast(std::forward(args))...}}; +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // numeric_limits +#include // false_type, is_constructible, is_integral, is_same, true_type +#include // declval +#include // tuple + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // random_access_iterator_tag + +// #include + +// #include + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +struct iterator_types {}; + +template +struct iterator_types < + It, + void_t> +{ + using difference_type = typename It::difference_type; + using value_type = typename It::value_type; + using pointer = typename It::pointer; + using reference = typename It::reference; + using iterator_category = typename It::iterator_category; +}; + +// This is required as some compilers implement std::iterator_traits in a way that +// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. +template +struct iterator_traits +{ +}; + +template +struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> + : iterator_types +{ +}; + +template +struct iterator_traits::value>> +{ + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin); + +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end); + +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + +#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ + #define INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + #include // int64_t, uint64_t + #include // map + #include // allocator + #include // string + #include // vector + + // #include + + + /*! + @brief namespace for Niels Lohmann + @see https://github.com/nlohmann + @since version 1.0.0 + */ + NLOHMANN_JSON_NAMESPACE_BEGIN + + /*! + @brief default JSONSerializer template argument + + This serializer ignores the template arguments and uses ADL + ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) + for serialization. + */ + template + struct adl_serializer; + + /// a class to store JSON values + /// @sa https://json.nlohmann.me/api/basic_json/ + template class ObjectType = + std::map, + template class ArrayType = std::vector, + class StringType = std::string, class BooleanType = bool, + class NumberIntegerType = std::int64_t, + class NumberUnsignedType = std::uint64_t, + class NumberFloatType = double, + template class AllocatorType = std::allocator, + template class JSONSerializer = + adl_serializer, + class BinaryType = std::vector> + class basic_json; + + /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document + /// @sa https://json.nlohmann.me/api/json_pointer/ + template + class json_pointer; + + /*! + @brief default specialization + @sa https://json.nlohmann.me/api/json/ + */ + using json = basic_json<>; + + /// @brief a minimal map-like container that preserves insertion order + /// @sa https://json.nlohmann.me/api/ordered_map/ + template + struct ordered_map; + + /// @brief specialization that maintains the insertion order of object keys + /// @sa https://json.nlohmann.me/api/ordered_json/ + using ordered_json = basic_json; + + NLOHMANN_JSON_NAMESPACE_END + +#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + +NLOHMANN_JSON_NAMESPACE_BEGIN +/*! +@brief detail namespace with internal helper functions + +This namespace collects functions that should not be exposed, +implementations of some @ref basic_json methods, and meta-programming helpers. + +@since version 2.1.0 +*/ +namespace detail +{ + +///////////// +// helpers // +///////////// + +// Note to maintainers: +// +// Every trait in this file expects a non CV-qualified type. +// The only exceptions are in the 'aliases for detected' section +// (i.e. those of the form: decltype(T::member_function(std::declval()))) +// +// In this case, T has to be properly CV-qualified to constraint the function arguments +// (e.g. to_json(BasicJsonType&, const T&)) + +template struct is_basic_json : std::false_type {}; + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +struct is_basic_json : std::true_type {}; + +// used by exceptions create() member functions +// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t +// false_type otherwise +template +struct is_basic_json_context : + std::integral_constant < bool, + is_basic_json::type>::type>::value + || std::is_same::value > +{}; + +////////////////////// +// json_ref helpers // +////////////////////// + +template +class json_ref; + +template +struct is_json_ref : std::false_type {}; + +template +struct is_json_ref> : std::true_type {}; + +////////////////////////// +// aliases for detected // +////////////////////////// + +template +using mapped_type_t = typename T::mapped_type; + +template +using key_type_t = typename T::key_type; + +template +using value_type_t = typename T::value_type; + +template +using difference_type_t = typename T::difference_type; + +template +using pointer_t = typename T::pointer; + +template +using reference_t = typename T::reference; + +template +using iterator_category_t = typename T::iterator_category; + +template +using to_json_function = decltype(T::to_json(std::declval()...)); + +template +using from_json_function = decltype(T::from_json(std::declval()...)); + +template +using get_template_function = decltype(std::declval().template get()); + +// trait checking if JSONSerializer::from_json(json const&, udt&) exists +template +struct has_from_json : std::false_type {}; + +// trait checking if j.get is valid +// use this trait instead of std::is_constructible or std::is_convertible, +// both rely on, or make use of implicit conversions, and thus fail when T +// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958) +template +struct is_getable +{ + static constexpr bool value = is_detected::value; +}; + +template +struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if JSONSerializer::from_json(json const&) exists +// this overload is used for non-default-constructible user-defined-types +template +struct has_non_default_from_json : std::false_type {}; + +template +struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if BasicJsonType::json_serializer::to_json exists +// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. +template +struct has_to_json : std::false_type {}; + +template +struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +template +using detect_key_compare = typename T::key_compare; + +template +struct has_key_compare : std::integral_constant::value> {}; + +// obtains the actual object key comparator +template +struct actual_object_comparator +{ + using object_t = typename BasicJsonType::object_t; + using object_comparator_t = typename BasicJsonType::default_object_comparator_t; + using type = typename std::conditional < has_key_compare::value, + typename object_t::key_compare, object_comparator_t>::type; +}; + +template +using actual_object_comparator_t = typename actual_object_comparator::type; + +/////////////////// +// is_ functions // +/////////////////// + +// https://en.cppreference.com/w/cpp/types/conjunction +template struct conjunction : std::true_type { }; +template struct conjunction : B { }; +template +struct conjunction +: std::conditional(B::value), conjunction, B>::type {}; + +// https://en.cppreference.com/w/cpp/types/negation +template struct negation : std::integral_constant < bool, !B::value > { }; + +// Reimplementation of is_constructible and is_default_constructible, due to them being broken for +// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367). +// This causes compile errors in e.g. clang 3.5 or gcc 4.9. +template +struct is_default_constructible : std::is_default_constructible {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + + +template +struct is_constructible : std::is_constructible {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + + +template +struct is_iterator_traits : std::false_type {}; + +template +struct is_iterator_traits> +{ + private: + using traits = iterator_traits; + + public: + static constexpr auto value = + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value; +}; + +template +struct is_range +{ + private: + using t_ref = typename std::add_lvalue_reference::type; + + using iterator = detected_t; + using sentinel = detected_t; + + // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator + // and https://en.cppreference.com/w/cpp/iterator/sentinel_for + // but reimplementing these would be too much work, as a lot of other concepts are used underneath + static constexpr auto is_iterator_begin = + is_iterator_traits>::value; + + public: + static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin; +}; + +template +using iterator_t = enable_if_t::value, result_of_begin())>>; + +template +using range_value_t = value_type_t>>; + +// The following implementation of is_complete_type is taken from +// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/ +// and is written by Xiang Fan who agreed to using it in this library. + +template +struct is_complete_type : std::false_type {}; + +template +struct is_complete_type : std::true_type {}; + +template +struct is_compatible_object_type_impl : std::false_type {}; + +template +struct is_compatible_object_type_impl < + BasicJsonType, CompatibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + // macOS's is_constructible does not play well with nonesuch... + static constexpr bool value = + is_constructible::value && + is_constructible::value; +}; + +template +struct is_compatible_object_type + : is_compatible_object_type_impl {}; + +template +struct is_constructible_object_type_impl : std::false_type {}; + +template +struct is_constructible_object_type_impl < + BasicJsonType, ConstructibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + static constexpr bool value = + (is_default_constructible::value && + (std::is_move_assignable::value || + std::is_copy_assignable::value) && + (is_constructible::value && + std::is_same < + typename object_t::mapped_type, + typename ConstructibleObjectType::mapped_type >::value)) || + (has_from_json::value || + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value); +}; + +template +struct is_constructible_object_type + : is_constructible_object_type_impl {}; + +template +struct is_compatible_string_type +{ + static constexpr auto value = + is_constructible::value; +}; + +template +struct is_constructible_string_type +{ + // launder type through decltype() to fix compilation failure on ICPC +#ifdef __INTEL_COMPILER + using laundered_type = decltype(std::declval()); +#else + using laundered_type = ConstructibleStringType; +#endif + + static constexpr auto value = + conjunction < + is_constructible, + is_detected_exact>::value; +}; + +template +struct is_compatible_array_type_impl : std::false_type {}; + +template +struct is_compatible_array_type_impl < + BasicJsonType, CompatibleArrayType, + enable_if_t < + is_detected::value&& + is_iterator_traits>>::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 + !std::is_same>::value >> +{ + static constexpr bool value = + is_constructible>::value; +}; + +template +struct is_compatible_array_type + : is_compatible_array_type_impl {}; + +template +struct is_constructible_array_type_impl : std::false_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t::value >> + : std::true_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t < !std::is_same::value&& + !is_compatible_string_type::value&& + is_default_constructible::value&& +(std::is_move_assignable::value || + std::is_copy_assignable::value)&& +is_detected::value&& +is_iterator_traits>>::value&& +is_detected::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 +!std::is_same>::value&& + is_complete_type < + detected_t>::value >> +{ + using value_type = range_value_t; + + static constexpr bool value = + std::is_same::value || + has_from_json::value || + has_non_default_from_json < + BasicJsonType, + value_type >::value; +}; + +template +struct is_constructible_array_type + : is_constructible_array_type_impl {}; + +template +struct is_compatible_integer_type_impl : std::false_type {}; + +template +struct is_compatible_integer_type_impl < + RealIntegerType, CompatibleNumberIntegerType, + enable_if_t < std::is_integral::value&& + std::is_integral::value&& + !std::is_same::value >> +{ + // is there an assert somewhere on overflows? + using RealLimits = std::numeric_limits; + using CompatibleLimits = std::numeric_limits; + + static constexpr auto value = + is_constructible::value && + CompatibleLimits::is_integer && + RealLimits::is_signed == CompatibleLimits::is_signed; +}; + +template +struct is_compatible_integer_type + : is_compatible_integer_type_impl {}; + +template +struct is_compatible_type_impl: std::false_type {}; + +template +struct is_compatible_type_impl < + BasicJsonType, CompatibleType, + enable_if_t::value >> +{ + static constexpr bool value = + has_to_json::value; +}; + +template +struct is_compatible_type + : is_compatible_type_impl {}; + +template +struct is_constructible_tuple : std::false_type {}; + +template +struct is_constructible_tuple> : conjunction...> {}; + +template +struct is_json_iterator_of : std::false_type {}; + +template +struct is_json_iterator_of : std::true_type {}; + +template +struct is_json_iterator_of : std::true_type +{}; + +// checks if a given type T is a template specialization of Primary +template