Skip to content

Commit 3d92436

Browse files
committed
Merge remote-tracking branch 'upstream/master' into webgpu
2 parents 39d956d + d4e0d95 commit 3d92436

File tree

119 files changed

+26785
-18249
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+26785
-18249
lines changed

.github/labeler.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,10 @@ nix:
8686
embedding:
8787
- changed-files:
8888
- any-glob-to-any-file: examples/embedding/
89+
90+
Ascend NPU:
91+
- changed-files:
92+
- any-glob-to-any-file:
93+
- ggml/include/ggml-cann.h
94+
- ggml/src/ggml-cann/**
95+
- docs/backend/CANN.md

.github/workflows/build-linux-cross.yml

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,116 @@ jobs:
231231
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
232232
233233
cmake --build build --config Release -j $(nproc)
234+
235+
debian-13-loongarch64-cpu-cross:
236+
runs-on: ubuntu-24.04
237+
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
238+
239+
steps:
240+
- uses: actions/checkout@v4
241+
- name: Setup LoongArch
242+
run: |
243+
rm -f /etc/apt/sources.list.d/*
244+
cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
245+
deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
246+
EOF
247+
( echo 'quiet "true";'; \
248+
echo 'APT::Get::Assume-Yes "true";'; \
249+
echo 'APT::Install-Recommends "false";'; \
250+
echo 'Acquire::Check-Valid-Until "false";'; \
251+
echo 'Acquire::Retries "5";'; \
252+
) > /etc/apt/apt.conf.d/99snapshot-repos
253+
254+
apt-get update
255+
apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
256+
dpkg --add-architecture loong64
257+
258+
# Add arch-specific repositories for non-amd64 architectures
259+
cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
260+
deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
261+
EOF
262+
263+
apt-get update || true ;# Prevent failure due to missing URLs.
264+
265+
apt-get install -y --no-install-recommends \
266+
build-essential \
267+
gcc-14-loongarch64-linux-gnu \
268+
g++-14-loongarch64-linux-gnu
269+
270+
- name: Build
271+
run: |
272+
cmake -B build -DLLAMA_CURL=OFF \
273+
-DCMAKE_BUILD_TYPE=Release \
274+
-DGGML_OPENMP=OFF \
275+
-DLLAMA_BUILD_EXAMPLES=ON \
276+
-DLLAMA_BUILD_TOOLS=ON \
277+
-DLLAMA_BUILD_TESTS=OFF \
278+
-DCMAKE_SYSTEM_NAME=Linux \
279+
-DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
280+
-DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
281+
-DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
282+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
283+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
284+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
285+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
286+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
287+
288+
cmake --build build --config Release -j $(nproc)
289+
290+
debian-13-loongarch64-vulkan-cross:
291+
runs-on: ubuntu-24.04
292+
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
293+
294+
steps:
295+
- uses: actions/checkout@v4
296+
- name: Setup LoongArch
297+
run: |
298+
rm -f /etc/apt/sources.list.d/*
299+
cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
300+
deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
301+
EOF
302+
( echo 'quiet "true";'; \
303+
echo 'APT::Get::Assume-Yes "true";'; \
304+
echo 'APT::Install-Recommends "false";'; \
305+
echo 'Acquire::Check-Valid-Until "false";'; \
306+
echo 'Acquire::Retries "5";'; \
307+
) > /etc/apt/apt.conf.d/99snapshot-repos
308+
309+
apt-get update
310+
apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
311+
dpkg --add-architecture loong64
312+
313+
# Add arch-specific repositories for non-amd64 architectures
314+
cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
315+
deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
316+
EOF
317+
318+
apt-get update || true ;# Prevent failure due to missing URLs.
319+
320+
apt-get install -y --no-install-recommends \
321+
build-essential \
322+
glslc \
323+
gcc-14-loongarch64-linux-gnu \
324+
g++-14-loongarch64-linux-gnu \
325+
libvulkan-dev:loong64
326+
327+
- name: Build
328+
run: |
329+
cmake -B build -DLLAMA_CURL=OFF \
330+
-DCMAKE_BUILD_TYPE=Release \
331+
-DGGML_VULKAN=ON \
332+
-DGGML_OPENMP=OFF \
333+
-DLLAMA_BUILD_EXAMPLES=ON \
334+
-DLLAMA_BUILD_TOOLS=ON \
335+
-DLLAMA_BUILD_TESTS=OFF \
336+
-DCMAKE_SYSTEM_NAME=Linux \
337+
-DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
338+
-DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
339+
-DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
340+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
341+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
342+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
343+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
344+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
345+
346+
cmake --build build --config Release -j $(nproc)

.github/workflows/build.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ jobs:
306306
id: cmake_test
307307
run: |
308308
cd build
309+
export GGML_VK_VISIBLE_DEVICES=0
309310
# This is using llvmpipe and runs slower than other backends
310311
ctest -L main --verbose --timeout 3600
311312
@@ -687,8 +688,8 @@ jobs:
687688
strategy:
688689
matrix:
689690
include:
690-
- build: 'cpu-x64'
691-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
691+
- build: 'cpu-x64 (static)'
692+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF'
692693
- build: 'openblas-x64'
693694
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
694695
- build: 'vulkan-x64'

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,11 @@ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
159159
# ... otherwise assume ggml is added by a parent CMakeLists.txt
160160
endif()
161161

162+
if (MINGW)
163+
# Target Windows 8 for PrefetchVirtualMemory
164+
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
165+
endif()
166+
162167
#
163168
# build the library
164169
#

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ ifdef LLAMA_SERVER_SSL
367367
endif
368368

369369
ifndef GGML_NO_CPU_AARCH64
370-
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
370+
MK_CPPFLAGS += -DGGML_USE_CPU_REPACK
371371
endif
372372

373373
# warnings
@@ -970,7 +970,7 @@ OBJ_GGML = \
970970
$(DIR_GGML)/src/ggml-threading.o \
971971
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
972972
$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
973-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
973+
$(DIR_GGML)/src/ggml-cpu/repack.o \
974974
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
975975
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
976976
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)
44

55
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6+
[![Release](https://img.shields.io/github/v/release/ggml-org/llama.cpp)](https://github.com/ggml-org/llama.cpp/releases)
67
[![Server](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml)
78

89
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggml-org/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml)

ci/run.sh

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,20 @@ if [ ! -z ${GG_BUILD_METAL} ]; then
4646
fi
4747

4848
if [ ! -z ${GG_BUILD_CUDA} ]; then
49-
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
49+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON"
50+
51+
if command -v nvidia-smi >/dev/null 2>&1; then
52+
CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.')
53+
if [[ -n "$CUDA_ARCH" && "$CUDA_ARCH" =~ ^[0-9]+$ ]]; then
54+
CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH}"
55+
else
56+
echo "Warning: Using fallback CUDA architectures"
57+
CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=61;70;75;80;86;89"
58+
fi
59+
else
60+
echo "Error: nvidia-smi not found, cannot build with CUDA"
61+
exit 1
62+
fi
5063
fi
5164

5265
if [ ! -z ${GG_BUILD_SYCL} ]; then

common/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ llama_add_compile_flags()
77
# Build info header
88
#
99

10-
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
11-
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
10+
if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
11+
set(GIT_DIR "${PROJECT_SOURCE_DIR}/.git")
1212

1313
# Is git submodule
1414
if(NOT IS_DIRECTORY "${GIT_DIR}")
@@ -18,7 +18,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
1818
if (SLASH_POS EQUAL 0)
1919
set(GIT_DIR "${REAL_GIT_DIR}")
2020
else()
21-
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
21+
set(GIT_DIR "${PROJECT_SOURCE_DIR}/${REAL_GIT_DIR}")
2222
endif()
2323
endif()
2424

@@ -42,7 +42,7 @@ add_custom_command(
4242
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
4343
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} -DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}
4444
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
45-
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
45+
WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
4646
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
4747
VERBATIM
4848
)

common/common.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,7 @@ struct common_init_result common_init_from_params(common_params & params) {
934934
return iparams;
935935
}
936936

937-
if (params.ctx_shift && !llama_kv_self_can_shift(lctx)) {
937+
if (params.ctx_shift && !llama_memory_can_shift(llama_get_memory(lctx))) {
938938
LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__);
939939
params.ctx_shift = false;
940940
}
@@ -1041,7 +1041,7 @@ struct common_init_result common_init_from_params(common_params & params) {
10411041
if (llama_model_has_decoder(model)) {
10421042
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch)));
10431043
}
1044-
llama_kv_self_clear(lctx);
1044+
llama_memory_clear(llama_get_memory(lctx), true);
10451045
llama_synchronize(lctx);
10461046
llama_perf_context_reset(lctx);
10471047
llama_set_warmup(lctx, false);

common/speculative.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ llama_tokens common_speculative_gen_draft(
144144
auto & smpl = spec->smpl;
145145
auto & prompt = spec->prompt;
146146

147+
auto * mem = llama_get_memory(ctx);
148+
147149
int reuse_i = 0;
148150
int reuse_n = 0;
149151

@@ -173,7 +175,7 @@ llama_tokens common_speculative_gen_draft(
173175
result.reserve(params.n_draft);
174176

175177
if (reuse_n == 0) {
176-
llama_kv_self_clear(ctx);
178+
llama_memory_clear(mem, false);
177179

178180
prompt.clear();
179181
} else {
@@ -192,14 +194,14 @@ llama_tokens common_speculative_gen_draft(
192194
}
193195

194196
if (reuse_i > 0) {
195-
llama_kv_self_seq_rm (ctx, 0, 0, reuse_i);
196-
llama_kv_self_seq_add(ctx, 0, reuse_i, -1, -reuse_i);
197+
llama_memory_seq_rm (mem, 0, 0, reuse_i);
198+
llama_memory_seq_add(mem, 0, reuse_i, -1, -reuse_i);
197199

198200
prompt.erase(prompt.begin(), prompt.begin() + reuse_i);
199201
}
200202

201203
if (reuse_n < (int) prompt.size()) {
202-
llama_kv_self_seq_rm (ctx, 0, reuse_n, -1);
204+
llama_memory_seq_rm (mem, 0, reuse_n, -1);
203205

204206
prompt.erase(prompt.begin() + reuse_n, prompt.end());
205207
}

0 commit comments

Comments
 (0)