Skip to content

Commit c3b7922

Browse files
committed
Merge remote-tracking branch 'origin/master' into GraniteFour
* origin/master: (27 commits) llama : fix build_ffn without gate (ggml-org#13336) CUDA: fix bad asserts for partial offload (ggml-org#13337) convert : qwen2/3moe : set yarn metadata if present (ggml-org#13331) CUDA: fix --split-mode row for MMQ (ggml-org#13323) gguf-py : avoid requiring pyside6 for other scripts (ggml-org#13036) CUDA: fix logic for clearing padding with -ngl 0 (ggml-org#13320) sampling : Integrate Top-nσ into main sampling chain (and add it to the server) (ggml-org#13264) server : Webui - change setText command from parent window to also send the message. (ggml-org#13309) mtmd : rename llava directory to mtmd (ggml-org#13311) clip : fix confused naming ffn_up and ffn_down (ggml-org#13290) convert : bailingmoe : set yarn metadata if present (ggml-org#13312) SYCL: Disable mul_mat kernels for noncontiguous tensor b (ggml-org#13308) mtmd : add C public API (ggml-org#13184) rpc : use backend registry, support dl backends (ggml-org#13304) ggml : activate s390x simd for Q3_K (ggml-org#13301) llava/mtmd : fixes to fully support dl backends (ggml-org#13303) llama : build windows releases with dl backends (ggml-org#13220) CUDA: fix race condition in MMQ stream-k fixup (ggml-org#13299) CUDA: fix race condition in MMQ ids_dst (ggml-org#13294) vulkan: Additional type support for unary, binary, and copy (ggml-org#13266) ...
2 parents c82b240 + 2f54e34 commit c3b7922

File tree

251 files changed

+2506
-970
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

251 files changed

+2506
-970
lines changed

.editorconfig

+4-4
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,23 @@ indent_style = tab
2121
[prompts/*.txt]
2222
insert_final_newline = unset
2323

24-
[examples/server/public/*]
24+
[tools/server/public/*]
2525
indent_size = 2
2626

27-
[examples/server/public/deps_*]
27+
[tools/server/public/deps_*]
2828
trim_trailing_whitespace = unset
2929
indent_style = unset
3030
indent_size = unset
3131

32-
[examples/server/deps_*]
32+
[tools/server/deps_*]
3333
trim_trailing_whitespace = unset
3434
indent_style = unset
3535
indent_size = unset
3636

3737
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
3838
indent_style = tab
3939

40-
[examples/cvector-generator/*.txt]
40+
[tools/cvector-generator/*.txt]
4141
trim_trailing_whitespace = unset
4242
insert_final_newline = unset
4343

.flake8

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
max-line-length = 125
33
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
44
exclude =
5-
# Do not traverse examples
5+
# Do not traverse examples and tools
66
examples,
7+
tools,
78
# Do not include package initializers
89
__init__.py,
910
# No need to traverse our git directory

.github/labeler.yml

+4-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ build:
4545
- CMakePresets.json
4646
examples:
4747
- changed-files:
48-
- any-glob-to-any-file: examples/**
48+
- any-glob-to-any-file:
49+
- examples/**
50+
- tools/**
4951
devops:
5052
- changed-files:
5153
- any-glob-to-any-file:
@@ -70,7 +72,7 @@ android:
7072
server:
7173
- changed-files:
7274
- any-glob-to-any-file:
73-
- examples/server/**
75+
- tools/server/**
7476
ggml:
7577
- changed-files:
7678
- any-glob-to-any-file:

.github/workflows/bench.yml.disabled

+15-15
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ on:
2727
push:
2828
branches:
2929
- master
30-
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
30+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
3131
pull_request_target:
3232
types: [opened, synchronize, reopened]
33-
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
33+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
3434
schedule:
3535
- cron: '04 2 * * *'
3636

@@ -69,7 +69,7 @@ jobs:
6969
- name: Install python env
7070
id: pipenv
7171
run: |
72-
cd examples/server/bench
72+
cd tools/server/bench
7373
python3 -m venv venv
7474
source venv/bin/activate
7575
pip install -r requirements.txt
@@ -79,7 +79,7 @@ jobs:
7979
run: |
8080
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
8181
tar xzf prometheus*.tar.gz --strip-components=1
82-
./prometheus --config.file=examples/server/bench/prometheus.yml &
82+
./prometheus --config.file=tools/server/bench/prometheus.yml &
8383
while ! nc -z localhost 9090; do
8484
sleep 0.1
8585
done
@@ -92,7 +92,7 @@ jobs:
9292
- name: Install k6 and xk6-sse
9393
id: k6_installation
9494
run: |
95-
cd examples/server/bench
95+
cd tools/server/bench
9696
go install go.k6.io/xk6/cmd/xk6@latest
9797
xk6 build master \
9898
--with github.com/phymbert/xk6-sse
@@ -116,7 +116,7 @@ jobs:
116116
- name: Download the dataset
117117
id: download_dataset
118118
run: |
119-
cd examples/server/bench
119+
cd tools/server/bench
120120
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
121121

122122
- name: Server bench
@@ -126,7 +126,7 @@ jobs:
126126
run: |
127127
set -eux
128128

129-
cd examples/server/bench
129+
cd tools/server/bench
130130
source venv/bin/activate
131131
python bench.py \
132132
--runner-label ${{ env.RUNNER_LABEL }} \
@@ -157,9 +157,9 @@ jobs:
157157
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
158158
compression-level: 9
159159
path: |
160-
examples/server/bench/*.jpg
161-
examples/server/bench/*.json
162-
examples/server/bench/*.log
160+
tools/server/bench/*.jpg
161+
tools/server/bench/*.json
162+
tools/server/bench/*.log
163163

164164
- name: Commit status
165165
uses: Sibz/github-status-action@v1
@@ -178,17 +178,17 @@ jobs:
178178
with:
179179
client_id: ${{secrets.IMGUR_CLIENT_ID}}
180180
path: |
181-
examples/server/bench/prompt_tokens_seconds.jpg
182-
examples/server/bench/predicted_tokens_seconds.jpg
183-
examples/server/bench/kv_cache_usage_ratio.jpg
184-
examples/server/bench/requests_processing.jpg
181+
tools/server/bench/prompt_tokens_seconds.jpg
182+
tools/server/bench/predicted_tokens_seconds.jpg
183+
tools/server/bench/kv_cache_usage_ratio.jpg
184+
tools/server/bench/requests_processing.jpg
185185

186186
- name: Extract mermaid
187187
id: set_mermaid
188188
run: |
189189
set -eux
190190

191-
cd examples/server/bench
191+
cd tools/server/bench
192192
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
193193
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
194194
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV

.github/workflows/build-linux-cross.yml

+3
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ jobs:
3434
cmake -B build -DCMAKE_BUILD_TYPE=Release \
3535
-DGGML_OPENMP=OFF \
3636
-DLLAMA_BUILD_EXAMPLES=ON \
37+
-DLLAMA_BUILD_TOOLS=ON \
3738
-DLLAMA_BUILD_TESTS=OFF \
3839
-DCMAKE_SYSTEM_NAME=Linux \
3940
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -80,6 +81,7 @@ jobs:
8081
-DGGML_VULKAN=ON \
8182
-DGGML_OPENMP=OFF \
8283
-DLLAMA_BUILD_EXAMPLES=ON \
84+
-DLLAMA_BUILD_TOOLS=ON \
8385
-DLLAMA_BUILD_TESTS=OFF \
8486
-DCMAKE_SYSTEM_NAME=Linux \
8587
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -125,6 +127,7 @@ jobs:
125127
-DGGML_VULKAN=ON \
126128
-DGGML_OPENMP=OFF \
127129
-DLLAMA_BUILD_EXAMPLES=ON \
130+
-DLLAMA_BUILD_TOOLS=ON \
128131
-DLLAMA_BUILD_TESTS=OFF \
129132
-DCMAKE_SYSTEM_NAME=Linux \
130133
-DCMAKE_SYSTEM_PROCESSOR=aarch64 \

.github/workflows/build.yml

+31-45
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ jobs:
633633
-DGGML_METAL_EMBED_LIBRARY=ON \
634634
-DLLAMA_BUILD_COMMON=OFF \
635635
-DLLAMA_BUILD_EXAMPLES=OFF \
636+
-DLLAMA_BUILD_TOOLS=OFF \
636637
-DLLAMA_BUILD_TESTS=OFF \
637638
-DLLAMA_BUILD_SERVER=OFF \
638639
-DCMAKE_SYSTEM_NAME=iOS \
@@ -669,6 +670,7 @@ jobs:
669670
-DGGML_METAL_EMBED_LIBRARY=ON \
670671
-DLLAMA_BUILD_COMMON=OFF \
671672
-DLLAMA_BUILD_EXAMPLES=OFF \
673+
-DLLAMA_BUILD_TOOLS=OFF \
672674
-DLLAMA_BUILD_TESTS=OFF \
673675
-DLLAMA_BUILD_SERVER=OFF \
674676
-DCMAKE_SYSTEM_NAME=tvOS \
@@ -699,6 +701,7 @@ jobs:
699701
-DGGML_METAL_EMBED_LIBRARY=ON \
700702
-DLLAMA_BUILD_COMMON=OFF \
701703
-DLLAMA_BUILD_EXAMPLES=OFF \
704+
-DLLAMA_BUILD_TOOLS=OFF \
702705
-DLLAMA_BUILD_TESTS=OFF \
703706
-DLLAMA_BUILD_SERVER=OFF \
704707
-DCMAKE_SYSTEM_NAME=visionOS \
@@ -739,6 +742,7 @@ jobs:
739742
-DGGML_METAL_EMBED_LIBRARY=ON \
740743
-DLLAMA_CURL=OFF \
741744
-DLLAMA_BUILD_EXAMPLES=OFF \
745+
-DLLAMA_BUILD_TOOLS=OFF \
742746
-DLLAMA_BUILD_TESTS=OFF \
743747
-DLLAMA_BUILD_SERVER=OFF \
744748
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
@@ -767,7 +771,7 @@ jobs:
767771
uses: hendrikmuhs/[email protected]
768772
with:
769773
key: windows-msys2
770-
variant: sccache
774+
variant: ccache
771775
evict-old-files: 1d
772776

773777
- name: Setup ${{ matrix.sys }}
@@ -810,26 +814,18 @@ jobs:
810814
strategy:
811815
matrix:
812816
include:
813-
- build: 'noavx-x64'
814-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
815-
- build: 'avx2-x64'
816-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
817-
- build: 'avx-x64'
818-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
819-
- build: 'avx512-x64'
820-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
817+
- build: 'cpu-x64'
818+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
821819
- build: 'openblas-x64'
822-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
823-
- build: 'kompute-x64'
824-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
820+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
825821
- build: 'vulkan-x64'
826-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
822+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
827823
- build: 'llvm-arm64'
828824
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
829-
- build: 'msvc-arm64'
830-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
831825
- build: 'llvm-arm64-opencl-adreno'
832826
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
827+
# - build: 'kompute-x64'
828+
# defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
833829

834830
steps:
835831
- name: Clone
@@ -842,7 +838,7 @@ jobs:
842838
uses: hendrikmuhs/[email protected]
843839
with:
844840
key: windows-latest-cmake-${{ matrix.build }}
845-
variant: sccache
841+
variant: ccache
846842
evict-old-files: 1d
847843

848844
- name: Clone Kompute submodule
@@ -918,39 +914,26 @@ jobs:
918914
cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
919915
cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
920916
921-
- name: Check AVX512F support
922-
id: check_avx512f
923-
if: ${{ matrix.build == 'avx512-x64' }}
924-
continue-on-error: true
925-
run: |
926-
cd build
927-
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
928-
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
929-
$cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
930-
echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
931-
& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
932-
.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
933-
934917
- name: Test
935918
id: cmake_test
936-
# not all machines have native AVX-512
937-
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
919+
if: ${{ matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' }}
938920
run: |
939921
cd build
940922
ctest -L main -C Release --verbose --timeout 900
941923
942-
- name: Test (Intel SDE)
943-
id: cmake_test_sde
944-
if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
945-
run: |
946-
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
947-
# for some weird reason windows tar doesn't like sde tar.xz
948-
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
949-
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
950-
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
951-
cd build
952-
$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
953-
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
924+
# TODO: disabled for now, consider adding tests for all CPU variants instead
925+
# - name: Test (Intel SDE)
926+
# id: cmake_test_sde
927+
# if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
928+
# run: |
929+
# curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
930+
# # for some weird reason windows tar doesn't like sde tar.xz
931+
# 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
932+
# 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
933+
# $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
934+
# cd build
935+
# $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
936+
# & $sde -future -- ctest -L main -C Release --verbose --timeout 900
954937

955938
- name: Determine tag name
956939
id: tag
@@ -1035,7 +1018,7 @@ jobs:
10351018
uses: hendrikmuhs/[email protected]
10361019
with:
10371020
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
1038-
variant: sccache
1021+
variant: ccache
10391022
evict-old-files: 1d
10401023

10411024
- name: Install Cuda Toolkit 11.7
@@ -1113,6 +1096,8 @@ jobs:
11131096
cmake -S . -B build -G "Ninja Multi-Config" ^
11141097
-DLLAMA_BUILD_SERVER=ON ^
11151098
-DGGML_NATIVE=OFF ^
1099+
-DGGML_BACKEND_DL=ON ^
1100+
-DGGML_CPU_ALL_VARIANTS=ON ^
11161101
-DGGML_CUDA=ON ^
11171102
-DGGML_RPC=ON ^
11181103
-DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include"
@@ -1187,7 +1172,7 @@ jobs:
11871172
uses: hendrikmuhs/[email protected]
11881173
with:
11891174
key: windows-latest-cmake-sycl
1190-
variant: sccache
1175+
variant: ccache
11911176
evict-old-files: 1d
11921177

11931178
- name: Install
@@ -1417,6 +1402,7 @@ jobs:
14171402
-DGGML_METAL_EMBED_LIBRARY=ON \
14181403
-DLLAMA_CURL=OFF \
14191404
-DLLAMA_BUILD_EXAMPLES=OFF \
1405+
-DLLAMA_BUILD_TOOLS=OFF \
14201406
-DLLAMA_BUILD_TESTS=OFF \
14211407
-DLLAMA_BUILD_SERVER=OFF \
14221408
-DCMAKE_SYSTEM_NAME=iOS \

0 commit comments

Comments
 (0)