Skip to content

Commit 7237e1f

Browse files
authored
Merge branch 'ggerganov:master' into master
2 parents 6ee2a55 + a38b884 commit 7237e1f

File tree

115 files changed

+5734
-2010
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+5734
-2010
lines changed

.devops/nix/package.nix

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,18 @@
1717
rocmPackages,
1818
vulkan-headers,
1919
vulkan-loader,
20-
clblast,
20+
curl,
2121
useBlas ? builtins.all (x: !x) [
2222
useCuda
2323
useMetalKit
24-
useOpenCL
2524
useRocm
2625
useVulkan
2726
] && blas.meta.available,
2827
useCuda ? config.cudaSupport,
29-
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
28+
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
3029
useMpi ? false, # Increases the runtime closure size by ~700M
31-
useOpenCL ? false,
3230
useRocm ? config.rocmSupport,
31+
enableCurl ? true,
3332
useVulkan ? false,
3433
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
3534

@@ -56,7 +55,6 @@ let
5655
++ lib.optionals useCuda [ "CUDA" ]
5756
++ lib.optionals useMetalKit [ "MetalKit" ]
5857
++ lib.optionals useMpi [ "MPI" ]
59-
++ lib.optionals useOpenCL [ "OpenCL" ]
6058
++ lib.optionals useRocm [ "ROCm" ]
6159
++ lib.optionals useVulkan [ "Vulkan" ];
6260

@@ -198,19 +196,19 @@ effectiveStdenv.mkDerivation (
198196
optionals effectiveStdenv.isDarwin darwinBuildInputs
199197
++ optionals useCuda cudaBuildInputs
200198
++ optionals useMpi [ mpi ]
201-
++ optionals useOpenCL [ clblast ]
202199
++ optionals useRocm rocmBuildInputs
203200
++ optionals useBlas [ blas ]
204-
++ optionals useVulkan vulkanBuildInputs;
201+
++ optionals useVulkan vulkanBuildInputs
202+
++ optionals enableCurl [ curl ];
205203

206204
cmakeFlags =
207205
[
208206
(cmakeBool "LLAMA_BUILD_SERVER" true)
209207
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
210208
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
209+
(cmakeBool "LLAMA_CURL" enableCurl)
211210
(cmakeBool "GGML_NATIVE" false)
212211
(cmakeBool "GGML_BLAS" useBlas)
213-
(cmakeBool "GGML_CLBLAST" useOpenCL)
214212
(cmakeBool "GGML_CUDA" useCuda)
215213
(cmakeBool "GGML_HIPBLAS" useRocm)
216214
(cmakeBool "GGML_METAL" useMetalKit)
@@ -254,7 +252,6 @@ effectiveStdenv.mkDerivation (
254252
useCuda
255253
useMetalKit
256254
useMpi
257-
useOpenCL
258255
useRocm
259256
useVulkan
260257
;
@@ -281,7 +278,7 @@ effectiveStdenv.mkDerivation (
281278
# Configurations we don't want even the CI to evaluate. Results in the
282279
# "unsupported platform" messages. This is mostly a no-op, because
283280
# cudaPackages would've refused to evaluate anyway.
284-
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
281+
badPlatforms = optionals useCuda lib.platforms.darwin;
285282

286283
# Configurations that are known to result in build failures. Can be
287284
# overridden by importing Nixpkgs with `allowBroken = true`.

.github/ISSUE_TEMPLATE/config.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,3 @@ contact_links:
99
- name: Want to contribute?
1010
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
1111
about: Head to the contribution guide page of the wiki for areas you can help with
12-
13-

.github/workflows/build.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
sysctl -a
4848
mkdir build
4949
cd build
50-
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
50+
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF ..
5151
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
5252
5353
- name: Test
@@ -105,7 +105,7 @@ jobs:
105105
sysctl -a
106106
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
107107
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
108-
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON
108+
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
109109
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
110110
111111
- name: Test
@@ -222,7 +222,7 @@ jobs:
222222
run: |
223223
mkdir build
224224
cd build
225-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
225+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
226226
cmake --build . --config Release -j $(nproc)
227227
228228
- name: Test
@@ -799,6 +799,7 @@ jobs:
799799
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
800800
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
801801
cd build
802+
$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
802803
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
803804
804805
- name: Determine tag name

.gitignore

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,14 @@ examples/server/*.mjs.hpp
9898

9999
# Python
100100

101-
__pycache__
102-
.venv
103-
/Pipfile
104-
dist
105-
poetry.lock
101+
/.venv
102+
__pycache__/
103+
*/poetry.lock
106104
poetry.toml
107105

106+
# Nix
107+
/result
108+
108109
# Test binaries
109110
/tests/test-backend-ops
110111
/tests/test-double-float

CMakeLists.txt

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ endif()
4242

4343
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
4444

45+
if (WIN32)
46+
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
47+
endif()
48+
4549
#
4650
# option list
4751
#
@@ -79,14 +83,21 @@ set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
7983
set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
8084
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
8185
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
82-
set(GGML_LLAMAFILE ON)
83-
set(GGML_CUDA_USE_GRAPHS ON)
86+
87+
# change the default for these ggml options
88+
if (NOT DEFINED GGML_LLAMAFILE)
89+
set(GGML_LLAMAFILE ON)
90+
endif()
91+
92+
if (NOT DEFINED GGML_CUDA_USE_GRAPHS)
93+
set(GGML_CUDA_USE_GRAPHS ON)
94+
endif()
8495

8596
# transition helpers
8697
function (llama_option_depr TYPE OLD NEW)
8798
if (${OLD})
8899
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
89-
set(${NEW} ON)
100+
set(${NEW} ON PARENT_SCOPE)
90101
endif()
91102
endfunction()
92103

@@ -96,7 +107,6 @@ llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE)
96107
llama_option_depr(WARNING LLAMA_METAL GGML_METAL)
97108
llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
98109
llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
99-
llama_option_depr(WARNING LLAMA_OPENMP GGML_OPENMP)
100110
llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
101111
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
102112
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
@@ -146,7 +156,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
146156
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
147157

148158
install(
149-
FILES convert-hf-to-gguf.py
159+
FILES convert_hf_to_gguf.py
150160
PERMISSIONS
151161
OWNER_READ
152162
OWNER_WRITE

CMakePresets.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"cacheVariables": {
2020
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
2121
"CMAKE_CXX_COMPILER": "icx",
22+
"CMAKE_C_COMPILER": "cl",
2223
"GGML_SYCL": "ON",
2324
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
2425
}

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ BUILD_TARGETS = \
4545
TEST_TARGETS = \
4646
tests/test-autorelease \
4747
tests/test-backend-ops \
48+
tests/test-chat-template \
4849
tests/test-double-float \
4950
tests/test-grad0 \
5051
tests/test-grammar-integration \
@@ -61,6 +62,11 @@ TEST_TARGETS = \
6162
tests/test-tokenizer-1-bpe \
6263
tests/test-tokenizer-1-spm
6364

65+
# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
66+
LEGACY_TARGETS = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
67+
simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
68+
retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm
69+
6470
# Deprecation aliases
6571
ifdef LLAMA_CUBLAS
6672
$(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.)
@@ -1070,6 +1076,7 @@ clean:
10701076
rm -rvf src/*.o
10711077
rm -rvf tests/*.o
10721078
rm -rvf examples/*.o
1079+
rm -rvf common/*.o
10731080
rm -rvf *.a
10741081
rm -rvf *.dll
10751082
rm -rvf *.so
@@ -1084,6 +1091,7 @@ clean:
10841091
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
10851092
rm -rvf $(BUILD_TARGETS)
10861093
rm -rvf $(TEST_TARGETS)
1094+
rm -rvf $(LEGACY_TARGETS)
10871095
find examples pocs -type f -name "*.o" -delete
10881096

10891097
#

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ Typically finetunes of the base models below are supported as well.
108108
- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon)
109109
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
110110
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
111+
- [X] [BERT](https://github.com/ggerganov/llama.cpp/pull/5423)
111112
- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
112113
- [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft)
113114
- [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila)
@@ -217,6 +218,11 @@ Unless otherwise noted these projects are open-source with permissive licensing:
217218
**Tools:**
218219

219220
- [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML
221+
- [crashr/gppm](https://github.com/crashr/gppm) – launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption
222+
223+
**Infrastructure:**
224+
225+
- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp
220226

221227
---
222228

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ function gg_run_embd_bge_small {
688688
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
689689
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
690690

691-
python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
691+
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
692692

693693
model_f16="${path_models}/ggml-model-f16.gguf"
694694
model_q8_0="${path_models}/ggml-model-q8_0.gguf"

0 commit comments

Comments
 (0)