Skip to content

Commit 2b74648

Browse files
committed
Merge branch 'master' of github.com:ggerganov/llama.cpp into convert-bf16-fix
2 parents 675a741 + c8a0090 commit 2b74648

File tree

341 files changed

+44059
-162712
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

341 files changed

+44059
-162712
lines changed

.devops/full-cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG CUDA_VERSION=11.7.1
66
# Target the CUDA build image
77
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
88

9-
FROM ${BASE_CUDA_DEV_CONTAINER} as build
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
ARG CUDA_DOCKER_ARCH=all

.devops/full-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
66
# Target the CUDA build image
77
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
88

9-
FROM ${BASE_ROCM_DEV_CONTAINER} as build
9+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878

.devops/full.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
66
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1

.devops/llama-cli-cuda.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VER
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

9-
FROM ${BASE_CUDA_DEV_CONTAINER} as build
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
ARG CUDA_DOCKER_ARCH=all
@@ -25,7 +25,7 @@ ENV GGML_CUDA=1
2525

2626
RUN make -j$(nproc) llama-cli
2727

28-
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
28+
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2929

3030
RUN apt-get update && \
3131
apt-get install -y libgomp1

.devops/llama-cli-intel.Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
22

3-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
3+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
44

55
ARG GGML_SYCL_F16=OFF
66
RUN apt-get update && \
@@ -14,10 +14,12 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1414
echo "GGML_SYCL_F16 is set" && \
1515
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
1616
fi && \
17-
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
17+
echo "Building with static libs" && \
18+
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \
19+
${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \
1820
cmake --build build --config Release --target llama-cli
1921

20-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
22+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
2123

2224
COPY --from=build /app/build/bin/llama-cli /llama-cli
2325

.devops/llama-cli-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
66
# Target the CUDA build image
77
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
88

9-
FROM ${BASE_ROCM_DEV_CONTAINER} as build
9+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878

.devops/llama-cli-vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=jammy
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
# Install build tools
66
RUN apt update && apt install -y git build-essential cmake wget libgomp1

.devops/llama-cli.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
66
apt-get install -y build-essential git
@@ -11,7 +11,7 @@ COPY . .
1111

1212
RUN make -j$(nproc) llama-cli
1313

14-
FROM ubuntu:$UBUNTU_VERSION as runtime
14+
FROM ubuntu:$UBUNTU_VERSION AS runtime
1515

1616
RUN apt-get update && \
1717
apt-get install -y libgomp1

.devops/llama-server-cuda.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VER
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

9-
FROM ${BASE_CUDA_DEV_CONTAINER} as build
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
ARG CUDA_DOCKER_ARCH=all
@@ -27,7 +27,7 @@ ENV LLAMA_CURL=1
2727

2828
RUN make -j$(nproc) llama-server
2929

30-
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
30+
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
3131

3232
RUN apt-get update && \
3333
apt-get install -y libcurl4-openssl-dev libgomp1 curl

.devops/llama-server-intel.Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
22

3-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
3+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
44

55
ARG GGML_SYCL_F16=OFF
66
RUN apt-get update && \
@@ -14,10 +14,11 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1414
echo "GGML_SYCL_F16 is set" && \
1515
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
1616
fi && \
17+
echo "Building with dynamic libs" && \
1718
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
1819
cmake --build build --config Release --target llama-server
1920

20-
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
21+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
2122

2223
RUN apt-get update && \
2324
apt-get install -y libcurl4-openssl-dev curl

.devops/llama-server-rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ ARG ROCM_VERSION=5.6
66
# Target the CUDA build image
77
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
88

9-
FROM ${BASE_ROCM_DEV_CONTAINER} as build
9+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1010

1111
# Unless otherwise specified, we make a fat build.
1212
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878

.devops/llama-server-vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=jammy
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
# Install build tools
66
RUN apt update && apt install -y git build-essential cmake wget

.devops/llama-server.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
66
apt-get install -y build-essential git libcurl4-openssl-dev curl
@@ -13,7 +13,7 @@ ENV LLAMA_CURL=1
1313

1414
RUN make -j$(nproc) llama-server
1515

16-
FROM ubuntu:$UBUNTU_VERSION as runtime
16+
FROM ubuntu:$UBUNTU_VERSION AS runtime
1717

1818
RUN apt-get update && \
1919
apt-get install -y libcurl4-openssl-dev libgomp1

.devops/nix/apps.nix

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
"llama-embedding"
1111
"llama-server"
1212
"llama-quantize"
13-
"llama-train-text-from-scratch"
1413
];
1514
mkApp = name: {
1615
type = "app";

.devops/nix/package.nix

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@
1717
rocmPackages,
1818
vulkan-headers,
1919
vulkan-loader,
20-
clblast,
20+
curl,
21+
shaderc,
2122
useBlas ? builtins.all (x: !x) [
2223
useCuda
2324
useMetalKit
24-
useOpenCL
2525
useRocm
2626
useVulkan
2727
] && blas.meta.available,
2828
useCuda ? config.cudaSupport,
29-
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
29+
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
3030
useMpi ? false, # Increases the runtime closure size by ~700M
31-
useOpenCL ? false,
3231
useRocm ? config.rocmSupport,
32+
enableCurl ? true,
3333
useVulkan ? false,
3434
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
3535

@@ -56,7 +56,6 @@ let
5656
++ lib.optionals useCuda [ "CUDA" ]
5757
++ lib.optionals useMetalKit [ "MetalKit" ]
5858
++ lib.optionals useMpi [ "MPI" ]
59-
++ lib.optionals useOpenCL [ "OpenCL" ]
6059
++ lib.optionals useRocm [ "ROCm" ]
6160
++ lib.optionals useVulkan [ "Vulkan" ];
6261

@@ -91,6 +90,22 @@ let
9190
ps.tiktoken
9291
ps.torchWithoutCuda
9392
ps.transformers
93+
94+
# server bench
95+
ps.matplotlib
96+
97+
# server tests
98+
ps.openai
99+
ps.behave
100+
ps.prometheus-client
101+
102+
# for examples/pydantic-models-to-grammar-examples.py
103+
ps.docstring-parser
104+
ps.pydantic
105+
106+
# for scripts/compare-llama-bench.py
107+
ps.gitpython
108+
ps.tabulate
94109
]
95110
);
96111

@@ -111,16 +126,9 @@ let
111126
++ optionals useMetalKit [ MetalKit ];
112127

113128
cudaBuildInputs = with cudaPackages; [
114-
cuda_cccl.dev # <nv/target>
115-
116-
# A temporary hack for reducing the closure size, remove once cudaPackages
117-
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
118-
cuda_cudart.dev
119-
cuda_cudart.lib
120-
cuda_cudart.static
121-
libcublas.dev
122-
libcublas.lib
123-
libcublas.static
129+
cuda_cudart
130+
cuda_cccl # <nv/target>
131+
libcublas
124132
];
125133

126134
rocmBuildInputs = with rocmPackages; [
@@ -132,6 +140,7 @@ let
132140
vulkanBuildInputs = [
133141
vulkan-headers
134142
vulkan-loader
143+
shaderc
135144
];
136145
in
137146

@@ -198,19 +207,19 @@ effectiveStdenv.mkDerivation (
198207
optionals effectiveStdenv.isDarwin darwinBuildInputs
199208
++ optionals useCuda cudaBuildInputs
200209
++ optionals useMpi [ mpi ]
201-
++ optionals useOpenCL [ clblast ]
202210
++ optionals useRocm rocmBuildInputs
203211
++ optionals useBlas [ blas ]
204-
++ optionals useVulkan vulkanBuildInputs;
212+
++ optionals useVulkan vulkanBuildInputs
213+
++ optionals enableCurl [ curl ];
205214

206215
cmakeFlags =
207216
[
208217
(cmakeBool "LLAMA_BUILD_SERVER" true)
209218
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
210219
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
220+
(cmakeBool "LLAMA_CURL" enableCurl)
211221
(cmakeBool "GGML_NATIVE" false)
212222
(cmakeBool "GGML_BLAS" useBlas)
213-
(cmakeBool "GGML_CLBLAST" useOpenCL)
214223
(cmakeBool "GGML_CUDA" useCuda)
215224
(cmakeBool "GGML_HIPBLAS" useRocm)
216225
(cmakeBool "GGML_METAL" useMetalKit)
@@ -254,7 +263,6 @@ effectiveStdenv.mkDerivation (
254263
useCuda
255264
useMetalKit
256265
useMpi
257-
useOpenCL
258266
useRocm
259267
useVulkan
260268
;
@@ -281,7 +289,7 @@ effectiveStdenv.mkDerivation (
281289
# Configurations we don't want even the CI to evaluate. Results in the
282290
# "unsupported platform" messages. This is mostly a no-op, because
283291
# cudaPackages would've refused to evaluate anyway.
284-
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
292+
badPlatforms = optionals useCuda lib.platforms.darwin;
285293

286294
# Configurations that are known to result in build failures. Can be
287295
# overridden by importing Nixpkgs with `allowBroken = true`.

.devops/tools.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,11 @@ arg1="$1"
88
shift
99

1010
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
11-
python3 ./convert-hf-to-gguf.py "$@"
11+
python3 ./convert_hf_to_gguf.py "$@"
1212
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
./llama-quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
./llama-cli "$@"
16-
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
17-
./llama-finetune "$@"
1816
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
1917
echo "Converting PTH to GGML..."
2018
for i in `ls $1/$2/ggml-model-f16.bin*`; do
@@ -36,8 +34,6 @@ else
3634
echo " ex: --outtype f16 \"/models/7B/\" "
3735
echo " --quantize (-q): Optimize with quantization process ggml"
3836
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
39-
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
40-
echo " See documentation for finetune for command-line parameters"
4137
echo " --all-in-one (-a): Execute --convert & --quantize"
4238
echo " ex: \"/models/\" 7B"
4339
echo " --server (-s): Run a model on the server"

.github/ISSUE_TEMPLATE/config.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,3 @@ contact_links:
99
- name: Want to contribute?
1010
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
1111
about: Head to the contribution guide page of the wiki for areas you can help with
12-
13-

.github/labeler.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ SYCL:
1616
- any-glob-to-any-file:
1717
- ggml/include/ggml-sycl.h
1818
- ggml/src/ggml-sycl.cpp
19-
- README-sycl.md
19+
- ggml/src/ggml-sycl/**
20+
- docs/backend/SYCL.md
21+
- examples/sycl/**
2022
Nvidia GPU:
2123
- changed-files:
2224
- any-glob-to-any-file:

.github/workflows/build.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,10 @@ jobs:
355355
- name: Dependencies
356356
id: depends
357357
run: |
358-
sudo apt-get update
359-
sudo apt-get install build-essential libvulkan-dev
358+
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
359+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
360+
sudo apt-get update -y
361+
sudo apt-get install -y build-essential vulkan-sdk
360362
361363
- name: Build
362364
id: cmake_build
@@ -858,6 +860,7 @@ jobs:
858860
mkdir build
859861
cd build
860862
cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
863+
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) -t ggml
861864
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
862865
863866
- name: Determine tag name

0 commit comments

Comments
 (0)