From d3e73df66af43ad51706e78c5ad820767d677405 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 03:09:04 +0000 Subject: [PATCH 01/23] flake.lock: update to hotfix CUDA::cuda_driver Required to support https://github.com/ggerganov/llama.cpp/pull/4606 --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index 0455f65617a2d..0b9c9768b9d42 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1698318101, - "narHash": "sha256-gUihHt3yPD7bVqg+k/UVHgngyaJ3DMEBchbymBMvK1E=", + "lastModified": 1703559957, + "narHash": "sha256-x9PUuMEPGUOMB51zNxrDr2QoHbYWlCS2xhFedm9MC5Q=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "63678e9f3d3afecfeafa0acead6239cdb447574c", + "rev": "75dd68c36f458c6593c5bbb48abfd3e59bfed380", "type": "github" }, "original": { From 8364cf4d0b4461f7ab8d6d3319688f2ab5b2ac32 Mon Sep 17 00:00:00 2001 From: Philip Taron Date: Fri, 22 Dec 2023 12:33:09 -0800 Subject: [PATCH 02/23] flake.nix: rewrite 1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one. 5. Use specific CUDA packages instead of cudatoolkit on the advice of SomeoneSerge. 6. Format with `serokell/nixfmt` for a consistent style. 7. Update `flake.lock` with the latest goods. --- .devops/nix/apps.nix | 14 +++ .devops/nix/devshells.nix | 10 ++ .devops/nix/overlay.nix | 17 +++ .devops/nix/package.nix | 182 +++++++++++++++++++++++++++++++ flake.lock | 34 ------ flake.nix | 220 +++++++++++++++----------------------- 6 files changed, 310 insertions(+), 167 deletions(-) create mode 100644 .devops/nix/apps.nix create mode 100644 .devops/nix/devshells.nix create mode 100644 .devops/nix/overlay.nix create mode 100644 .devops/nix/package.nix diff --git a/.devops/nix/apps.nix b/.devops/nix/apps.nix new file mode 100644 index 0000000000000..d9b6a1e000628 --- /dev/null +++ b/.devops/nix/apps.nix @@ -0,0 +1,14 @@ +{ package, binaries }: + +let + default = builtins.elemAt binaries 0; + mkApp = name: { + ${name} = { + type = "app"; + program = "${package}/bin/${name}"; + }; + }; + result = builtins.foldl' (acc: name: (mkApp name) // acc) { } binaries; +in + +result // { default = result.${default}; } diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix new file mode 100644 index 0000000000000..f8d541f3068a5 --- /dev/null +++ b/.devops/nix/devshells.nix @@ -0,0 +1,10 @@ +{ concatMapAttrs, packages }: + +concatMapAttrs + (name: package: { + ${name} = package.passthru.shell.overrideAttrs (prevAttrs: { inputsFrom = [ package ]; }); + ${name + "-extra"} = package.passthru.shell-extra.overrideAttrs ( + prevAttrs: { inputsFrom = [ package ]; } + ); + }) + packages diff --git a/.devops/nix/overlay.nix b/.devops/nix/overlay.nix new file mode 100644 index 0000000000000..e5fede7740641 --- /dev/null +++ b/.devops/nix/overlay.nix @@ -0,0 +1,17 @@ +final: prev: + +let + inherit (final.stdenv) isAarch64 isDarwin; + + darwinSpecific = + if isAarch64 then + { inherit (final.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit; } + else + { inherit (final.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo; }; + + osSpecific = if isDarwin then darwinSpecific else { }; +in + +{ + llama-cpp = final.callPackage ./package.nix osSpecific; +} diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix new file mode 100644 index 0000000000000..460a32e47b1f0 --- /dev/null +++ b/.devops/nix/package.nix @@ -0,0 +1,182 @@ +{ + lib, + config, + stdenv, + mkShell, + cmake, + ninja, + pkg-config, + git, + python3, + mpi, + openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations + cudaPackages, + rocmPackages, + clblast, + Accelerate ? null, + MetalKit ? null, + CoreVideo ? null, + CoreGraphics ? null, + useOpenCL ? false, + useCuda ? config.cudaSupport, + useRocm ? config.rocmSupport, +}@inputs: + +let + inherit (lib) + cmakeBool + cmakeFeature + optionals + versionOlder + ; + isDefault = !useOpenCL && !useCuda && !useRocm; + + # It's necessary to consistently use backendStdenv when building with CUDA support, + # otherwise we get libstdc++ errors downstream. + stdenv = throw "Use effectiveStdenv instead"; + effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv; + + # Give a little description difference between the flavors. + descriptionSuffix = + if useOpenCL then + " (OpenCL accelerated)" + else if useCuda then + " (CUDA accelerated)" + else if useRocm then + " (ROCm accelerated)" + else if (MetalKit != null) then + " (MetalKit accelerated)" + else + ""; + + # TODO: package the Python in this repository in a Nix-like way. + # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo + # is PEP 517-compatible, and ensure the correct .dist-info is generated. + # https://peps.python.org/pep-0517/ + llama-python = python3.withPackages ( + ps: [ + ps.numpy + ps.sentencepiece + ] + ); + + # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime + llama-python-extra = python3.withPackages ( + ps: [ + ps.numpy + ps.sentencepiece + ps.torchWithoutCuda + ps.transformers + ] + ); + + # See ./overlay.nix for where these dependencies are passed in. + defaultBuildInputs = builtins.filter (p: p != null) [ + Accelerate + MetalKit + CoreVideo + CoreGraphics + ]; + + cudaBuildInputs = with cudaPackages; [ + cuda_cccl.dev # + cuda_cudart + libcublas + ]; + + rocmBuildInputs = with rocmPackages; [ + clr + hipblas + rocblas + ]; +in + +effectiveStdenv.mkDerivation { + name = "llama.cpp"; + src = ../../.; + meta = { + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; + mainProgram = "llama"; + }; + + postPatch = '' + substituteInPlace ./ggml-metal.m \ + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" + + # TODO: Package up each Python script or service appropriately. + # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`, + # we could make those *.py into setuptools' entrypoints + substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" + ''; + + nativeBuildInputs = [ + cmake + ninja + pkg-config + git + ] ++ optionals useCuda [ cudaPackages.cuda_nvcc ]; + + buildInputs = + [ mpi ] + ++ optionals useOpenCL [ clblast ] + ++ optionals useCuda cudaBuildInputs + ++ optionals useRocm rocmBuildInputs + ++ optionals isDefault defaultBuildInputs; + + cmakeFlags = + [ + (cmakeBool "LLAMA_NATIVE" true) + (cmakeBool "LLAMA_BUILD_SERVER" true) + (cmakeBool "BUILD_SHARED_LIBS" true) + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) + ] + ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ] + ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ] + ++ optionals useRocm [ + (cmakeBool "LLAMA_HIPBLAS" true) + (cmakeFeature "CMAKE_C_COMPILER" "hipcc") + (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") + + # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM + # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt + # and select the line that matches the current nixpkgs version of rocBLAS. + # Should likely use `rocmPackages.clr.gpuTargets`. + "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" + ] + ++ optionals isDefault ( + if (MetalKit != null) then + [ + "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" + "-DLLAMA_METAL=ON" + ] + else + [ + "-DLLAMA_BLAS=ON" + "-DLLAMA_BLAS_VENDOR=OpenBLAS" + ] + ); + + # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, + # if they haven't been added yet. + postInstall = '' + mv $out/bin/main $out/bin/llama + mv $out/bin/server $out/bin/llama-server + mkdir -p $out/include + cp $src/llama.h $out/include/ + ''; + + # Define the shells here, but don't add in the inputsFrom to avoid recursion. + passthru = { + shell = mkShell { + name = "default${descriptionSuffix}"; + description = "contains numpy and sentencepiece"; + buildInputs = [ llama-python ]; + }; + + shell-extra = mkShell { + name = "extra${descriptionSuffix}"; + description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; + buildInputs = [ llama-python-extra ]; + }; + }; +} diff --git a/flake.lock b/flake.lock index 0b9c9768b9d42..656792f21cbf9 100644 --- a/flake.lock +++ b/flake.lock @@ -1,23 +1,5 @@ { "nodes": { - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1694529238, - "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, "nixpkgs": { "locked": { "lastModified": 1703559957, @@ -36,24 +18,8 @@ }, "root": { "inputs": { - "flake-utils": "flake-utils", "nixpkgs": "nixpkgs" } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } } }, "root": "root", diff --git a/flake.nix b/flake.nix index 4cf28d5c11c0f..dcf8e1d9defa0 100644 --- a/flake.nix +++ b/flake.nix @@ -1,139 +1,93 @@ { inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; - flake-utils.url = "github:numtide/flake-utils"; }; - outputs = { self, nixpkgs, flake-utils }: - flake-utils.lib.eachDefaultSystem (system: - let - name = "llama.cpp"; - src = ./.; - meta.mainProgram = "llama"; - inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin; - buildInputs = with pkgs; [ openmpi ]; - osSpecific = with pkgs; buildInputs ++ ( - if isAarch64 && isDarwin then - with pkgs.darwin.apple_sdk_11_0.frameworks; [ - Accelerate - MetalKit - ] - else if isAarch32 && isDarwin then - with pkgs.darwin.apple_sdk.frameworks; [ - Accelerate - CoreGraphics - CoreVideo - ] - else if isDarwin then - with pkgs.darwin.apple_sdk.frameworks; [ - Accelerate - CoreGraphics - CoreVideo - ] - else - with pkgs; [ openblas ] - ); - pkgs = import nixpkgs { inherit system; }; - nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ]; - cudatoolkit_joined = with pkgs; symlinkJoin { - # HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit - # see https://github.com/NixOS/nixpkgs/issues/224291 - # copied from jaxlib - name = "${cudaPackages.cudatoolkit.name}-merged"; - paths = [ - cudaPackages.cudatoolkit.lib - cudaPackages.cudatoolkit.out - ] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [ - # for some reason some of the required libs are in the targets/x86_64-linux - # directory; not sure why but this works around it - "${cudaPackages.cudatoolkit}/targets/${system}" + + outputs = + { self, nixpkgs }: + + let + systems = [ + "aarch64-darwin" + "aarch64-linux" + "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant) + "x86_64-linux" + ]; + eachSystem = f: nixpkgs.lib.genAttrs systems (system: f system); + in + + { + # These define the various ways to build the llama.cpp project. + # Integrate them into your flake.nix configuration by adding this overlay to nixpkgs.overlays. + overlays.default = import ./.devops/nix/overlay.nix; + + # These use the package definition from `./.devops/nix/package.nix`. + # There's one per backend that llama-cpp uses. Add more as needed! + packages = eachSystem ( + system: + let + defaultConfig = { + inherit system; + overlays = [ self.overlays.default ]; + }; + pkgs = import nixpkgs defaultConfig; + + # Let's not make a big deal about getting the CUDA bits. + cudaConfig = defaultConfig // { + config.cudaSupport = true; + config.allowUnfreePredicate = + p: + builtins.all + ( + license: + license.free + || builtins.elem license.shortName [ + "CUDA EULA" + "cuDNN EULA" + ] + ) + (p.meta.licenses or [ p.meta.license ]); + }; + pkgsCuda = import nixpkgs cudaConfig; + + # Let's make sure to turn on ROCm support across the whole package ecosystem. + rocmConfig = defaultConfig // { + config.rocmSupport = true; + }; + pkgsRocm = import nixpkgs rocmConfig; + in + { + default = pkgs.llama-cpp; + opencl = pkgs.llama-cpp.override { useOpenCL = true; }; + cuda = pkgsCuda.llama-cpp; + rocm = pkgsRocm.llama-cpp; + } + ); + + # These use the definition of llama-cpp from `./.devops/nix/package.nix` + # and expose various binaries as apps with `nix run .#app-name`. + # Note that none of these apps use anything other than the default backend. + apps = eachSystem ( + system: + import ./.devops/nix/apps.nix { + package = self.packages.${system}.default; + binaries = [ + "llama" + "llama-embedding" + "llama-server" + "quantize" + "train-text-from-scratch" ]; - }; - llama-python = - pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]); - # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime - llama-python-extra = - pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]); - postPatch = '' - substituteInPlace ./ggml-metal.m \ - --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" - substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python' - ''; - postInstall = '' - mv $out/bin/main $out/bin/llama - mv $out/bin/server $out/bin/llama-server - mkdir -p $out/include - cp ${src}/llama.h $out/include/ - ''; - cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ]; - in - { - packages.default = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = osSpecific; - cmakeFlags = cmakeFlags - ++ (if isAarch64 && isDarwin then [ - "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" - "-DLLAMA_METAL=ON" - ] else [ - "-DLLAMA_BLAS=ON" - "-DLLAMA_BLAS_VENDOR=OpenBLAS" - ]); - }; - packages.opencl = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = with pkgs; buildInputs ++ [ clblast ]; - cmakeFlags = cmakeFlags ++ [ - "-DLLAMA_CLBLAST=ON" - ]; - }; - packages.cuda = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ]; - cmakeFlags = cmakeFlags ++ [ - "-DLLAMA_CUBLAS=ON" - ]; - }; - packages.rocm = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ]; - cmakeFlags = cmakeFlags ++ [ - "-DLLAMA_HIPBLAS=1" - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM - # in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt - # and select the line that matches the current nixpkgs version of rocBLAS. - "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" - ]; - }; - apps.llama-server = { - type = "app"; - program = "${self.packages.${system}.default}/bin/llama-server"; - }; - apps.llama-embedding = { - type = "app"; - program = "${self.packages.${system}.default}/bin/embedding"; - }; - apps.llama = { - type = "app"; - program = "${self.packages.${system}.default}/bin/llama"; - }; - apps.quantize = { - type = "app"; - program = "${self.packages.${system}.default}/bin/quantize"; - }; - apps.train-text-from-scratch = { - type = "app"; - program = "${self.packages.${system}.default}/bin/train-text-from-scratch"; - }; - apps.default = self.apps.${system}.llama; - devShells.default = pkgs.mkShell { - buildInputs = [ llama-python ]; - packages = nativeBuildInputs ++ osSpecific; - }; - devShells.extra = pkgs.mkShell { - buildInputs = [ llama-python-extra ]; - packages = nativeBuildInputs ++ osSpecific; - }; - }); + } + ); + + # These expose a build environment for either a "default" or an "extra" set of dependencies. + devShells = eachSystem ( + system: + import ./.devops/nix/devshells.nix { + concatMapAttrs = nixpkgs.lib.concatMapAttrs; + packages = self.packages.${system}; + } + ); + }; } From 0607e24ec22caa316648015f6831c41619ffe9a0 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Sun, 24 Dec 2023 18:15:25 +0000 Subject: [PATCH 03/23] flake.nix: use finalPackage instead of passing it manually --- .devops/nix/devshells.nix | 6 ++---- .devops/nix/package.nix | 6 ++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix index f8d541f3068a5..afaaa2644059b 100644 --- a/.devops/nix/devshells.nix +++ b/.devops/nix/devshells.nix @@ -2,9 +2,7 @@ concatMapAttrs (name: package: { - ${name} = package.passthru.shell.overrideAttrs (prevAttrs: { inputsFrom = [ package ]; }); - ${name + "-extra"} = package.passthru.shell-extra.overrideAttrs ( - prevAttrs: { inputsFrom = [ package ]; } - ); + ${name} = package.passthru.shell; + ${name + "-extra"} = package.passthru.shell-extra; }) packages diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 460a32e47b1f0..bd2dbf4b2c4bd 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -91,7 +91,7 @@ let ]; in -effectiveStdenv.mkDerivation { +effectiveStdenv.mkDerivation (finalAttrs: { name = "llama.cpp"; src = ../../.; meta = { @@ -171,12 +171,14 @@ effectiveStdenv.mkDerivation { name = "default${descriptionSuffix}"; description = "contains numpy and sentencepiece"; buildInputs = [ llama-python ]; + inputsFrom = [ finalAttrs.finalPackage ]; }; shell-extra = mkShell { name = "extra${descriptionSuffix}"; description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; buildInputs = [ llama-python-extra ]; + inputsFrom = [ finalAttrs.finalPackage ]; }; }; -} +}) From eab1c125b9871553ccd2d0aea49f143f9051f581 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Sun, 24 Dec 2023 19:35:32 +0000 Subject: [PATCH 04/23] nix: unclutter darwin support --- .devops/nix/overlay.nix | 14 +---------- .devops/nix/package.nix | 52 ++++++++++++++++++++--------------------- 2 files changed, 26 insertions(+), 40 deletions(-) diff --git a/.devops/nix/overlay.nix b/.devops/nix/overlay.nix index e5fede7740641..c7baec8434fa4 100644 --- a/.devops/nix/overlay.nix +++ b/.devops/nix/overlay.nix @@ -1,17 +1,5 @@ final: prev: -let - inherit (final.stdenv) isAarch64 isDarwin; - - darwinSpecific = - if isAarch64 then - { inherit (final.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit; } - else - { inherit (final.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo; }; - - osSpecific = if isDarwin then darwinSpecific else { }; -in - { - llama-cpp = final.callPackage ./package.nix osSpecific; + llama-cpp = final.callPackage ./package.nix { }; } diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index bd2dbf4b2c4bd..e286fda191b66 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -11,14 +11,18 @@ mpi, openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations cudaPackages, + darwin, rocmPackages, clblast, - Accelerate ? null, - MetalKit ? null, - CoreVideo ? null, - CoreGraphics ? null, - useOpenCL ? false, + useBlas ? builtins.all (x: !x) [ + useCuda + useMetalKit + useOpenCL + useRocm + ], useCuda ? config.cudaSupport, + useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, + useOpenCL ? false, useRocm ? config.rocmSupport, }@inputs: @@ -29,7 +33,6 @@ let optionals versionOlder ; - isDefault = !useOpenCL && !useCuda && !useRocm; # It's necessary to consistently use backendStdenv when building with CUDA support, # otherwise we get libstdc++ errors downstream. @@ -44,7 +47,7 @@ let " (CUDA accelerated)" else if useRocm then " (ROCm accelerated)" - else if (MetalKit != null) then + else if useMetalKit then " (MetalKit accelerated)" else ""; @@ -70,13 +73,16 @@ let ] ); - # See ./overlay.nix for where these dependencies are passed in. - defaultBuildInputs = builtins.filter (p: p != null) [ - Accelerate - MetalKit - CoreVideo - CoreGraphics - ]; + # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 + # separately + darwinBuildInputs = + with darwin.apple_sdk.frameworks; + [ Accelerate ] + ++ optionals useMetalKit [ MetalKit ] + ++ optionals (!useMetalKit) [ + CoreVideo + CoreGraphics + ]; cudaBuildInputs = with cudaPackages; [ cuda_cccl.dev # @@ -121,7 +127,7 @@ effectiveStdenv.mkDerivation (finalAttrs: { ++ optionals useOpenCL [ clblast ] ++ optionals useCuda cudaBuildInputs ++ optionals useRocm rocmBuildInputs - ++ optionals isDefault defaultBuildInputs; + ++ optionals effectiveStdenv.isDarwin darwinBuildInputs; cmakeFlags = [ @@ -129,6 +135,8 @@ effectiveStdenv.mkDerivation (finalAttrs: { (cmakeBool "LLAMA_BUILD_SERVER" true) (cmakeBool "BUILD_SHARED_LIBS" true) (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) + (cmakeBool "LLAMA_METAL" useMetalKit) + (cmakeBool "LLAMA_BLAS" useBlas) ] ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ] ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ] @@ -143,18 +151,8 @@ effectiveStdenv.mkDerivation (finalAttrs: { # Should likely use `rocmPackages.clr.gpuTargets`. "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" ] - ++ optionals isDefault ( - if (MetalKit != null) then - [ - "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" - "-DLLAMA_METAL=ON" - ] - else - [ - "-DLLAMA_BLAS=ON" - "-DLLAMA_BLAS_VENDOR=OpenBLAS" - ] - ); + ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ] + ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ]; # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, # if they haven't been added yet. From 02599417918eaa5a9ee98d7408f372dbaf25ce2f Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Sun, 24 Dec 2023 19:36:30 +0000 Subject: [PATCH 05/23] nix: pass most darwin frameworks unconditionally ...for simplicity --- .devops/nix/package.nix | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index e286fda191b66..1d401a9ee4ce9 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -77,12 +77,12 @@ let # separately darwinBuildInputs = with darwin.apple_sdk.frameworks; - [ Accelerate ] - ++ optionals useMetalKit [ MetalKit ] - ++ optionals (!useMetalKit) [ + [ + Accelerate CoreVideo CoreGraphics - ]; + ] + ++ optionals useMetalKit [ MetalKit ]; cudaBuildInputs = with cudaPackages; [ cuda_cccl.dev # From 0fa62c1ab93500e474778a3f2d98f30991d63837 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 25 Dec 2023 16:23:56 +0000 Subject: [PATCH 06/23] *.nix: nixfmt nix shell github:piegamesde/nixfmt/rfc101-style --command \ nixfmt flake.nix .devops/nix/*.nix --- .devops/nix/package.nix | 162 ++++++++++++++++++++-------------------- 1 file changed, 82 insertions(+), 80 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 1d401a9ee4ce9..5b88cf079f605 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -97,86 +97,88 @@ let ]; in -effectiveStdenv.mkDerivation (finalAttrs: { - name = "llama.cpp"; - src = ../../.; - meta = { - description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; - mainProgram = "llama"; - }; - - postPatch = '' - substituteInPlace ./ggml-metal.m \ - --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" - - # TODO: Package up each Python script or service appropriately. - # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`, - # we could make those *.py into setuptools' entrypoints - substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" - ''; - - nativeBuildInputs = [ - cmake - ninja - pkg-config - git - ] ++ optionals useCuda [ cudaPackages.cuda_nvcc ]; - - buildInputs = - [ mpi ] - ++ optionals useOpenCL [ clblast ] - ++ optionals useCuda cudaBuildInputs - ++ optionals useRocm rocmBuildInputs - ++ optionals effectiveStdenv.isDarwin darwinBuildInputs; - - cmakeFlags = - [ - (cmakeBool "LLAMA_NATIVE" true) - (cmakeBool "LLAMA_BUILD_SERVER" true) - (cmakeBool "BUILD_SHARED_LIBS" true) - (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) - (cmakeBool "LLAMA_METAL" useMetalKit) - (cmakeBool "LLAMA_BLAS" useBlas) - ] - ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ] - ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ] - ++ optionals useRocm [ - (cmakeBool "LLAMA_HIPBLAS" true) - (cmakeFeature "CMAKE_C_COMPILER" "hipcc") - (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") - - # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM - # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt - # and select the line that matches the current nixpkgs version of rocBLAS. - # Should likely use `rocmPackages.clr.gpuTargets`. - "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" - ] - ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ] - ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ]; - - # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, - # if they haven't been added yet. - postInstall = '' - mv $out/bin/main $out/bin/llama - mv $out/bin/server $out/bin/llama-server - mkdir -p $out/include - cp $src/llama.h $out/include/ - ''; - - # Define the shells here, but don't add in the inputsFrom to avoid recursion. - passthru = { - shell = mkShell { - name = "default${descriptionSuffix}"; - description = "contains numpy and sentencepiece"; - buildInputs = [ llama-python ]; - inputsFrom = [ finalAttrs.finalPackage ]; +effectiveStdenv.mkDerivation ( + finalAttrs: { + name = "llama.cpp"; + src = ../../.; + meta = { + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; + mainProgram = "llama"; }; - shell-extra = mkShell { - name = "extra${descriptionSuffix}"; - description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; - buildInputs = [ llama-python-extra ]; - inputsFrom = [ finalAttrs.finalPackage ]; + postPatch = '' + substituteInPlace ./ggml-metal.m \ + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" + + # TODO: Package up each Python script or service appropriately. + # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`, + # we could make those *.py into setuptools' entrypoints + substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" + ''; + + nativeBuildInputs = [ + cmake + ninja + pkg-config + git + ] ++ optionals useCuda [ cudaPackages.cuda_nvcc ]; + + buildInputs = + [ mpi ] + ++ optionals useOpenCL [ clblast ] + ++ optionals useCuda cudaBuildInputs + ++ optionals useRocm rocmBuildInputs + ++ optionals effectiveStdenv.isDarwin darwinBuildInputs; + + cmakeFlags = + [ + (cmakeBool "LLAMA_NATIVE" true) + (cmakeBool "LLAMA_BUILD_SERVER" true) + (cmakeBool "BUILD_SHARED_LIBS" true) + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) + (cmakeBool "LLAMA_METAL" useMetalKit) + (cmakeBool "LLAMA_BLAS" useBlas) + ] + ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ] + ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ] + ++ optionals useRocm [ + (cmakeBool "LLAMA_HIPBLAS" true) + (cmakeFeature "CMAKE_C_COMPILER" "hipcc") + (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") + + # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM + # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt + # and select the line that matches the current nixpkgs version of rocBLAS. + # Should likely use `rocmPackages.clr.gpuTargets`. + "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" + ] + ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ] + ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ]; + + # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, + # if they haven't been added yet. + postInstall = '' + mv $out/bin/main $out/bin/llama + mv $out/bin/server $out/bin/llama-server + mkdir -p $out/include + cp $src/llama.h $out/include/ + ''; + + # Define the shells here, but don't add in the inputsFrom to avoid recursion. + passthru = { + shell = mkShell { + name = "default${descriptionSuffix}"; + description = "contains numpy and sentencepiece"; + buildInputs = [ llama-python ]; + inputsFrom = [ finalAttrs.finalPackage ]; + }; + + shell-extra = mkShell { + name = "extra${descriptionSuffix}"; + description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; + buildInputs = [ llama-python-extra ]; + inputsFrom = [ finalAttrs.finalPackage ]; + }; }; - }; -}) + } +) From 69c56bc0f43d76495c910bcae0858f010ec63256 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 25 Dec 2023 17:29:02 +0000 Subject: [PATCH 07/23] flake.nix: add maintainers --- .devops/nix/package.nix | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 5b88cf079f605..12b8f66451f47 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -104,6 +104,16 @@ effectiveStdenv.mkDerivation ( meta = { description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; mainProgram = "llama"; + + + # These people might respond if you ping them in case of Nix-specific + # regressions or for reviewing Nix-specific PRs. + + # Note that lib.maintainers is defined in Nixpkgs. + maintainers = with lib.maintainers; [ + philiptaron + SomeoneSerge + ]; }; postPatch = '' From a07407c98defd7a00fdac62338c90ebb0d90b61a Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 25 Dec 2023 17:29:50 +0000 Subject: [PATCH 08/23] nix: move meta down to follow Nixpkgs style more closely --- .devops/nix/package.nix | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 12b8f66451f47..471c46b2a7235 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -101,20 +101,6 @@ effectiveStdenv.mkDerivation ( finalAttrs: { name = "llama.cpp"; src = ../../.; - meta = { - description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; - mainProgram = "llama"; - - - # These people might respond if you ping them in case of Nix-specific - # regressions or for reviewing Nix-specific PRs. - - # Note that lib.maintainers is defined in Nixpkgs. - maintainers = with lib.maintainers; [ - philiptaron - SomeoneSerge - ]; - }; postPatch = '' substituteInPlace ./ggml-metal.m \ @@ -190,5 +176,20 @@ effectiveStdenv.mkDerivation ( inputsFrom = [ finalAttrs.finalPackage ]; }; }; + + meta = { + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; + mainProgram = "llama"; + + + # These people might respond if you ping them in case of Nix-specific + # regressions or for reviewing Nix-specific PRs. + + # Note that lib.maintainers is defined in Nixpkgs. + maintainers = with lib.maintainers; [ + philiptaron + SomeoneSerge + ]; + }; } ) From 04bc417466d72c302831e90bc5c13257890b3a31 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 25 Dec 2023 17:32:44 +0000 Subject: [PATCH 09/23] nix: add missing meta attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nix: clarify the interpretation of meta.maintainers nix: clarify the meaning of "broken" and "badPlatforms" nix: passthru: expose the use* flags for inspection E.g.: ``` ❯ nix eval .#cuda.useCuda true ``` --- .devops/nix/package.nix | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 471c46b2a7235..c6d03b4a480e7 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -162,6 +162,14 @@ effectiveStdenv.mkDerivation ( # Define the shells here, but don't add in the inputsFrom to avoid recursion. passthru = { + inherit + useBlas + useCuda + useMetalKit + useOpenCL + useRocm + ; + shell = mkShell { name = "default${descriptionSuffix}"; description = "contains numpy and sentencepiece"; @@ -178,18 +186,39 @@ effectiveStdenv.mkDerivation ( }; meta = { + # Configurations we don't want even the CI to evaluate. Results in the + # "unsupported platform" messages. This is mostly a no-op, because + # cudaPackages would've refused to evaluate anyway. + badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; + + # Configurations that are known to result in build failures. Can be + # overridden by importing Nixpkgs with `allowBroken = true`. + broken = (useMetalKit && !effectiveStdenv.isDarwin); + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; - mainProgram = "llama"; + homepage = "https://github.com/ggerganov/llama.cpp/"; + license = lib.licenses.mit; + # Accommodates `nix run` and `lib.getExe` + mainProgram = "llama"; - # These people might respond if you ping them in case of Nix-specific - # regressions or for reviewing Nix-specific PRs. + # These people might respond, on the best effort basis, if you ping them + # in case of Nix-specific regressions or for reviewing Nix-specific PRs. + # Consider adding yourself to this list if you want to ensure this flake + # stays maintained and you're willing to invest your time. Do not add + # other people without their consent. Consider removing people after + # they've been unreachable for long periods of time. - # Note that lib.maintainers is defined in Nixpkgs. + # Note that lib.maintainers is defined in Nixpkgs, but you may just add + # an attrset following the same format as in + # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix maintainers = with lib.maintainers; [ - philiptaron - SomeoneSerge + philiptaron + SomeoneSerge ]; + + # Extend `badPlatforms` instead + platforms = lib.platforms.all; }; } ) From d08690af65af266414512d308d88fa1715f75821 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 25 Dec 2023 15:27:58 +0000 Subject: [PATCH 10/23] flake.nix: avoid re-evaluating nixpkgs too many times --- .devops/nix/overlay.nix | 5 ---- .devops/nix/scope.nix | 3 +++ flake.nix | 52 +++++++++++++++++++++++++++-------------- 3 files changed, 38 insertions(+), 22 deletions(-) delete mode 100644 .devops/nix/overlay.nix create mode 100644 .devops/nix/scope.nix diff --git a/.devops/nix/overlay.nix b/.devops/nix/overlay.nix deleted file mode 100644 index c7baec8434fa4..0000000000000 --- a/.devops/nix/overlay.nix +++ /dev/null @@ -1,5 +0,0 @@ -final: prev: - -{ - llama-cpp = final.callPackage ./package.nix { }; -} diff --git a/.devops/nix/scope.nix b/.devops/nix/scope.nix new file mode 100644 index 0000000000000..78e6a126d3b00 --- /dev/null +++ b/.devops/nix/scope.nix @@ -0,0 +1,3 @@ +{ lib, newScope }: + +lib.makeScope newScope (self: { llama-cpp = self.callPackage ./package.nix { }; }) diff --git a/flake.nix b/flake.nix index dcf8e1d9defa0..f837f47cf386c 100644 --- a/flake.nix +++ b/flake.nix @@ -17,23 +17,42 @@ in { - # These define the various ways to build the llama.cpp project. - # Integrate them into your flake.nix configuration by adding this overlay to nixpkgs.overlays. - overlays.default = import ./.devops/nix/overlay.nix; + # An overlay can be used to have a more granular control over llama-cpp's + # dependencies and configuration, than that offered by the `.override` + # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays. + # + # E.g. in a flake: + # ``` + # { nixpkgs, llama-cpp, ... }: + # let pkgs = import nixpkgs { + # overlays = [ (llama-cpp.overlays.default) ]; + # system = "aarch64-linux"; + # config.allowUnfree = true; + # config.cudaSupport = true; + # config.cudaCapabilities = [ "7.2" ]; + # config.cudaEnableForwardCompat = false; + # }; in { + # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp; + # } + # ``` + # + # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format + overlays.default = (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; }); # These use the package definition from `./.devops/nix/package.nix`. # There's one per backend that llama-cpp uses. Add more as needed! packages = eachSystem ( system: let - defaultConfig = { + # Avoid re-evaluation for the nixpkgs instance, + # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs + pkgs = nixpkgs.legacyPackages.${system}; + + # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, + # and ucx are built with CUDA support) + pkgsCuda = import nixpkgs { inherit system; - overlays = [ self.overlays.default ]; - }; - pkgs = import nixpkgs defaultConfig; - # Let's not make a big deal about getting the CUDA bits. - cudaConfig = defaultConfig // { config.cudaSupport = true; config.allowUnfreePredicate = p: @@ -48,19 +67,18 @@ ) (p.meta.licenses or [ p.meta.license ]); }; - pkgsCuda = import nixpkgs cudaConfig; - # Let's make sure to turn on ROCm support across the whole package ecosystem. - rocmConfig = defaultConfig // { + # Ensure dependencies use ROCm consistently + pkgsRocm = import nixpkgs { + inherit system; config.rocmSupport = true; }; - pkgsRocm = import nixpkgs rocmConfig; in { - default = pkgs.llama-cpp; - opencl = pkgs.llama-cpp.override { useOpenCL = true; }; - cuda = pkgsCuda.llama-cpp; - rocm = pkgsRocm.llama-cpp; + default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp; + opencl = self.packages.${system}.default.override { useOpenCL = true; }; + cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp; + rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp; } ); From a28c9acca3a474a15f2408a774883668bffbc5ae Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 25 Dec 2023 16:18:52 +0000 Subject: [PATCH 11/23] flake.nix: use flake-parts --- .devops/nix/apps.nix | 34 ++++--- .devops/nix/devshells.nix | 21 ++-- .devops/nix/nixpkgs-instances.nix | 35 +++++++ flake.lock | 37 +++++++ flake.nix | 164 ++++++++++++------------------ 5 files changed, 172 insertions(+), 119 deletions(-) create mode 100644 .devops/nix/nixpkgs-instances.nix diff --git a/.devops/nix/apps.nix b/.devops/nix/apps.nix index d9b6a1e000628..b8a12cc0a0463 100644 --- a/.devops/nix/apps.nix +++ b/.devops/nix/apps.nix @@ -1,14 +1,22 @@ -{ package, binaries }: - -let - default = builtins.elemAt binaries 0; - mkApp = name: { - ${name} = { - type = "app"; - program = "${package}/bin/${name}"; +{ + perSystem = + { config, lib, ... }: + { + apps = + let + inherit (config.packages) default; + binaries = [ + "llama" + "llama-embedding" + "llama-server" + "quantize" + "train-text-from-scratch" + ]; + mkApp = name: { + type = "app"; + program = "${default}/bin/${name}"; + }; + in + lib.genAttrs binaries mkApp; }; - }; - result = builtins.foldl' (acc: name: (mkApp name) // acc) { } binaries; -in - -result // { default = result.${default}; } +} diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix index afaaa2644059b..1862f0f085100 100644 --- a/.devops/nix/devshells.nix +++ b/.devops/nix/devshells.nix @@ -1,8 +1,13 @@ -{ concatMapAttrs, packages }: - -concatMapAttrs - (name: package: { - ${name} = package.passthru.shell; - ${name + "-extra"} = package.passthru.shell-extra; - }) - packages +{ + perSystem = + { config, lib, ... }: + { + devShells = + lib.concatMapAttrs + (name: package: { + ${name} = package.passthru.shell; + ${name + "-extra"} = package.passthru.shell-extra; + }) + config.packages; + }; +} diff --git a/.devops/nix/nixpkgs-instances.nix b/.devops/nix/nixpkgs-instances.nix new file mode 100644 index 0000000000000..6e9872b28c8fb --- /dev/null +++ b/.devops/nix/nixpkgs-instances.nix @@ -0,0 +1,35 @@ +{ inputs, ... }: +{ + # The _module.args definitions are passed on to modules as arguments. E.g. + # the module `{ pkgs ... }: { /* config */ }` implicitly uses + # `_module.args.pkgs` (defined in this case by flake-parts). + perSystem = + { system, ... }: + { + _module.args = { + pkgsCuda = import inputs.nixpkgs { + inherit system; + # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, + # and ucx are built with CUDA support) + config.cudaSupport = true; + config.allowUnfreePredicate = + p: + builtins.all + ( + license: + license.free + || builtins.elem license.shortName [ + "CUDA EULA" + "cuDNN EULA" + ] + ) + (p.meta.licenses or [ p.meta.license ]); + }; + # Ensure dependencies use ROCm consistently + pkgsRocm = import inputs.nixpkgs { + inherit system; + config.rocmSupport = true; + }; + }; + }; +} diff --git a/flake.lock b/flake.lock index 656792f21cbf9..3fcd1f45d5a41 100644 --- a/flake.lock +++ b/flake.lock @@ -1,5 +1,23 @@ { "nodes": { + "flake-parts": { + "inputs": { + "nixpkgs-lib": "nixpkgs-lib" + }, + "locked": { + "lastModified": 1701473968, + "narHash": "sha256-YcVE5emp1qQ8ieHUnxt1wCZCC3ZfAS+SRRWZ2TMda7E=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "34fed993f1674c8d06d58b37ce1e0fe5eebcb9f5", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, "nixpkgs": { "locked": { "lastModified": 1703559957, @@ -16,8 +34,27 @@ "type": "github" } }, + "nixpkgs-lib": { + "locked": { + "dir": "lib", + "lastModified": 1701253981, + "narHash": "sha256-ztaDIyZ7HrTAfEEUt9AtTDNoCYxUdSd6NrRHaYOIxtk=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "e92039b55bcd58469325ded85d4f58dd5a4eaf58", + "type": "github" + }, + "original": { + "dir": "lib", + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, "root": { "inputs": { + "flake-parts": "flake-parts", "nixpkgs": "nixpkgs" } } diff --git a/flake.nix b/flake.nix index f837f47cf386c..ff610ec64d53b 100644 --- a/flake.nix +++ b/flake.nix @@ -1,111 +1,79 @@ { + description = "Port of Facebook's LLaMA model in C/C++"; + inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-parts.url = "github:hercules-ci/flake-parts"; }; + # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl: + # + # ```bash + # ❯ nix repl + # nix-repl> :lf github:ggerganov/llama.cpp + # Added 13 variables. + # nix-repl> outputs.apps.x86_64-linux.quantize + # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; } + # ``` outputs = - { self, nixpkgs }: - - let - systems = [ - "aarch64-darwin" - "aarch64-linux" - "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant) - "x86_64-linux" - ]; - eachSystem = f: nixpkgs.lib.genAttrs systems (system: f system); - in + { flake-parts, ... }@inputs: + flake-parts.lib.mkFlake { inherit inputs; } - { - # An overlay can be used to have a more granular control over llama-cpp's - # dependencies and configuration, than that offered by the `.override` - # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays. - # - # E.g. in a flake: - # ``` - # { nixpkgs, llama-cpp, ... }: - # let pkgs = import nixpkgs { - # overlays = [ (llama-cpp.overlays.default) ]; - # system = "aarch64-linux"; - # config.allowUnfree = true; - # config.cudaSupport = true; - # config.cudaCapabilities = [ "7.2" ]; - # config.cudaEnableForwardCompat = false; - # }; in { - # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp; - # } - # ``` - # - # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format - overlays.default = (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; }); + { - # These use the package definition from `./.devops/nix/package.nix`. - # There's one per backend that llama-cpp uses. Add more as needed! - packages = eachSystem ( - system: - let - # Avoid re-evaluation for the nixpkgs instance, - # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs - pkgs = nixpkgs.legacyPackages.${system}; + imports = [ + .devops/nix/nixpkgs-instances.nix + .devops/nix/apps.nix + .devops/nix/devshells.nix + ]; - # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, - # and ucx are built with CUDA support) - pkgsCuda = import nixpkgs { - inherit system; + # An overlay can be used to have a more granular control over llama-cpp's + # dependencies and configuration, than that offered by the `.override` + # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays. + # + # E.g. in a flake: + # ``` + # { nixpkgs, llama-cpp, ... }: + # let pkgs = import nixpkgs { + # overlays = [ (llama-cpp.overlays.default) ]; + # system = "aarch64-linux"; + # config.allowUnfree = true; + # config.cudaSupport = true; + # config.cudaCapabilities = [ "7.2" ]; + # config.cudaEnableForwardCompat = false; + # }; in { + # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp; + # } + # ``` + # + # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format + flake.overlays.default = + (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; }); - config.cudaSupport = true; - config.allowUnfreePredicate = - p: - builtins.all - ( - license: - license.free - || builtins.elem license.shortName [ - "CUDA EULA" - "cuDNN EULA" - ] - ) - (p.meta.licenses or [ p.meta.license ]); - }; + systems = [ + "aarch64-darwin" + "aarch64-linux" + "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant) + "x86_64-linux" + ]; - # Ensure dependencies use ROCm consistently - pkgsRocm = import nixpkgs { - inherit system; - config.rocmSupport = true; + perSystem = + { + config, + pkgs, + pkgsCuda, + pkgsRocm, + ... + }: + { + # We don't use the overlay here so as to avoid making too many instances of nixpkgs, + # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs + packages = { + default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp; + opencl = config.packages.default.override { useOpenCL = true; }; + cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp; + rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp; + }; }; - in - { - default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp; - opencl = self.packages.${system}.default.override { useOpenCL = true; }; - cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp; - rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp; - } - ); - - # These use the definition of llama-cpp from `./.devops/nix/package.nix` - # and expose various binaries as apps with `nix run .#app-name`. - # Note that none of these apps use anything other than the default backend. - apps = eachSystem ( - system: - import ./.devops/nix/apps.nix { - package = self.packages.${system}.default; - binaries = [ - "llama" - "llama-embedding" - "llama-server" - "quantize" - "train-text-from-scratch" - ]; - } - ); - - # These expose a build environment for either a "default" or an "extra" set of dependencies. - devShells = eachSystem ( - system: - import ./.devops/nix/devshells.nix { - concatMapAttrs = nixpkgs.lib.concatMapAttrs; - packages = self.packages.${system}; - } - ); - }; + }; } From a629371245a292f047c2bca9d2c8e9034338130a Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 25 Dec 2023 17:02:36 +0000 Subject: [PATCH 12/23] nix: migrate to pname+version --- .devops/nix/package.nix | 33 +++++++++++++++++++-------------- .devops/nix/scope.nix | 13 +++++++++++-- flake.nix | 15 ++++++++++----- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index c6d03b4a480e7..8fe250651fdd3 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -24,6 +24,7 @@ useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, useOpenCL ? false, useRocm ? config.rocmSupport, + llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake }@inputs: let @@ -31,6 +32,7 @@ let cmakeBool cmakeFeature optionals + strings versionOlder ; @@ -39,18 +41,19 @@ let stdenv = throw "Use effectiveStdenv instead"; effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv; - # Give a little description difference between the flavors. + suffices = + lib.optionals useOpenCL [ "OpenCL" ] + ++ lib.optionals useCuda [ "CUDA" ] + ++ lib.optionals useRocm [ "ROCm" ] + ++ lib.optionals useMetalKit [ "MetalKit" ] + ++ lib.optionals useBlas [ "BLAS" ]; + + pnameSuffix = + strings.optionalString (suffices != [ ]) + "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; descriptionSuffix = - if useOpenCL then - " (OpenCL accelerated)" - else if useCuda then - " (CUDA accelerated)" - else if useRocm then - " (ROCm accelerated)" - else if useMetalKit then - " (MetalKit accelerated)" - else - ""; + strings.optionalString (suffices != [ ]) + ", accelerated with ${strings.concatStringsSep ", " suffices}"; # TODO: package the Python in this repository in a Nix-like way. # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo @@ -99,7 +102,9 @@ in effectiveStdenv.mkDerivation ( finalAttrs: { - name = "llama.cpp"; + pname = "llama-cpp${pnameSuffix}"; + version = llamaVersion; + src = ../../.; postPatch = '' @@ -171,14 +176,14 @@ effectiveStdenv.mkDerivation ( ; shell = mkShell { - name = "default${descriptionSuffix}"; + name = "shell-${finalAttrs.finalPackage.name}"; description = "contains numpy and sentencepiece"; buildInputs = [ llama-python ]; inputsFrom = [ finalAttrs.finalPackage ]; }; shell-extra = mkShell { - name = "extra${descriptionSuffix}"; + name = "shell-extra-${finalAttrs.finalPackage.name}"; description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; buildInputs = [ llama-python-extra ]; inputsFrom = [ finalAttrs.finalPackage ]; diff --git a/.devops/nix/scope.nix b/.devops/nix/scope.nix index 78e6a126d3b00..7932ac1e8a910 100644 --- a/.devops/nix/scope.nix +++ b/.devops/nix/scope.nix @@ -1,3 +1,12 @@ -{ lib, newScope }: +{ + lib, + newScope, + llamaVersion ? "0.0.0", +}: -lib.makeScope newScope (self: { llama-cpp = self.callPackage ./package.nix { }; }) +lib.makeScope newScope ( + self: { + inherit llamaVersion; + llama-cpp = self.callPackage ./package.nix { }; + } +) diff --git a/flake.nix b/flake.nix index ff610ec64d53b..a7c2b58f5d0da 100644 --- a/flake.nix +++ b/flake.nix @@ -16,7 +16,10 @@ # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; } # ``` outputs = - { flake-parts, ... }@inputs: + { self, flake-parts, ... }@inputs: + let + llamaVersion = self.dirtyShortRev or self.shortRev; + in flake-parts.lib.mkFlake { inherit inputs; } { @@ -48,7 +51,9 @@ # # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format flake.overlays.default = - (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; }); + (final: prev: { + llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; + }); systems = [ "aarch64-darwin" @@ -69,10 +74,10 @@ # We don't use the overlay here so as to avoid making too many instances of nixpkgs, # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs packages = { - default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp; + default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; opencl = config.packages.default.override { useOpenCL = true; }; - cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp; - rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp; + cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; + rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; }; }; }; From e3b1ba27c21deec131d8082e213bd54372f799ee Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 25 Dec 2023 17:03:19 +0000 Subject: [PATCH 13/23] flake.nix: overlay: expose both the namespace and the default attribute --- flake.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/flake.nix b/flake.nix index a7c2b58f5d0da..7c7440fc92970 100644 --- a/flake.nix +++ b/flake.nix @@ -53,6 +53,7 @@ flake.overlays.default = (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; + inherit (final.llamaPackages) llama-cpp; }); systems = [ From 12d4a68efedbbd88df3a5fd9889e554059f9b860 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Mon, 25 Dec 2023 17:05:21 +0000 Subject: [PATCH 14/23] ci: add the (Nix) flakestry workflow --- .github/workflows/nix-flakestry.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/nix-flakestry.yml diff --git a/.github/workflows/nix-flakestry.yml b/.github/workflows/nix-flakestry.yml new file mode 100644 index 0000000000000..3abfb3509a648 --- /dev/null +++ b/.github/workflows/nix-flakestry.yml @@ -0,0 +1,23 @@ +# Make the flake discoverable on https://flakestry.dev +name: "Publish a flake to flakestry" +on: + push: + tags: + - "v?[0-9]+.[0-9]+.[0-9]+" + - "v?[0-9]+.[0-9]+" + workflow_dispatch: + inputs: + tag: + description: "The existing tag to publish" + type: "string" + required: true +jobs: + publish-flake: + runs-on: ubuntu-latest + permissions: + id-token: "write" + contents: "read" + steps: + - uses: flakestry/flakestry-publish@main + with: + version: "${{ inputs.tag || github.ref_name }}" From a16f58997c0e70eb97139af55444264aa83f4f78 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 03:58:02 +0000 Subject: [PATCH 15/23] nix: cmakeFlags: explicit OFF bools --- .devops/nix/package.nix | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 8fe250651fdd3..8d07508a1e538 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -137,13 +137,13 @@ effectiveStdenv.mkDerivation ( (cmakeBool "LLAMA_BUILD_SERVER" true) (cmakeBool "BUILD_SHARED_LIBS" true) (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) - (cmakeBool "LLAMA_METAL" useMetalKit) (cmakeBool "LLAMA_BLAS" useBlas) + (cmakeBool "LLAMA_CLBLAST" useOpenCL) + (cmakeBool "LLAMA_CUBLAS" useCuda) + (cmakeBool "LLAMA_HIPBLAS" useRocm) + (cmakeBool "LLAMA_METAL" useMetalKit) ] - ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ] - ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ] ++ optionals useRocm [ - (cmakeBool "LLAMA_HIPBLAS" true) (cmakeFeature "CMAKE_C_COMPILER" "hipcc") (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") From dd0e12c7410ac7e163c6d9b0dbcb9fe8ce3070c9 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 04:02:11 +0000 Subject: [PATCH 16/23] nix: cuda: reduce runtime closure --- .devops/nix/package.nix | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 8d07508a1e538..0e10ba61e72ec 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -89,8 +89,15 @@ let cudaBuildInputs = with cudaPackages; [ cuda_cccl.dev # - cuda_cudart - libcublas + + # A temporary hack for reducing the closure size, remove once cudaPackages + # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 + cuda_cudart.dev + cuda_cudart.lib + cuda_cudart.static + libcublas.dev + libcublas.lib + libcublas.static ]; rocmBuildInputs = with rocmPackages; [ From 4522c47a2282a595344ba8dcb85222910b2ffc4f Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 04:05:51 +0000 Subject: [PATCH 17/23] nix: fewer rebuilds --- .devops/nix/package.nix | 2 +- flake.nix | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 0e10ba61e72ec..3222ec4695622 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -112,7 +112,7 @@ effectiveStdenv.mkDerivation ( pname = "llama-cpp${pnameSuffix}"; version = llamaVersion; - src = ../../.; + src = lib.cleanSource ../../.; postPatch = '' substituteInPlace ./ggml-metal.m \ diff --git a/flake.nix b/flake.nix index 7c7440fc92970..3575cbf12607a 100644 --- a/flake.nix +++ b/flake.nix @@ -18,7 +18,13 @@ outputs = { self, flake-parts, ... }@inputs: let - llamaVersion = self.dirtyShortRev or self.shortRev; + # We could include the git revisions in the package names but those would + # needlessly trigger rebuilds: + # llamaVersion = self.dirtyShortRev or self.shortRev; + + # Nix already uses cryptographic hashes for versioning, so we'll just fix + # the fake semver for now: + llamaVersion = "0.0.0"; in flake-parts.lib.mkFlake { inherit inputs; } From ae6bebccb11b17ebeca2809e00e204f45644c76d Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 04:33:24 +0000 Subject: [PATCH 18/23] nix: respect config.cudaCapabilities --- .devops/nix/package.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 3222ec4695622..cb1e8f48045c8 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -150,6 +150,14 @@ effectiveStdenv.mkDerivation ( (cmakeBool "LLAMA_HIPBLAS" useRocm) (cmakeBool "LLAMA_METAL" useMetalKit) ] + ++ optionals useCuda [ + ( + with cudaPackages.flags; + cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( + builtins.concatStringsSep ";" (map dropDot cudaCapabilities) + ) + ) + ] ++ optionals useRocm [ (cmakeFeature "CMAKE_C_COMPILER" "hipcc") (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") From 1efbc6b0643751480a879d94600b589b95e82e21 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 17:26:22 +0000 Subject: [PATCH 19/23] nix: add the impure driver's location to the DT_RUNPATHs --- .devops/nix/package.nix | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index cb1e8f48045c8..c54a7c3c63e95 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -124,12 +124,20 @@ effectiveStdenv.mkDerivation ( substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" ''; - nativeBuildInputs = [ - cmake - ninja - pkg-config - git - ] ++ optionals useCuda [ cudaPackages.cuda_nvcc ]; + nativeBuildInputs = + [ + cmake + ninja + pkg-config + git + ] + ++ optionals useCuda [ + cudaPackages.cuda_nvcc + + # TODO: Replace with autoAddDriverRunpath + # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged + cudaPackages.autoAddOpenGLRunpathHook + ]; buildInputs = [ mpi ] From 82e48e256725f4865dd92a307786b4713e551af7 Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 22:20:07 +0000 Subject: [PATCH 20/23] nix: clean sources more thoroughly ...this way outPaths change less frequently, and so there are fewer rebuilds --- .devops/nix/package.nix | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index c54a7c3c63e95..2d00994577263 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -112,7 +112,16 @@ effectiveStdenv.mkDerivation ( pname = "llama-cpp${pnameSuffix}"; version = llamaVersion; - src = lib.cleanSource ../../.; + src = lib.cleanSourceWith { + filter = + name: type: + !(builtins.any (_: _) [ + (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths + (name == "README.md") # Ignore *.md changes whe computing outPaths + (lib.hasPrefix "." name) # Skip hidden files and directories + ]); + src = lib.cleanSource ../../.; + }; postPatch = '' substituteInPlace ./ggml-metal.m \ From 7bd8d8c6d7de67975fbb9681990d4d4af5b6bbab Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 22:23:30 +0000 Subject: [PATCH 21/23] nix: explicit mpi support --- .devops/nix/package.nix | 18 +++++++++++------- flake.nix | 3 +++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 2d00994577263..5f2a7c9f4bb3d 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -22,6 +22,7 @@ ], useCuda ? config.cudaSupport, useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, + useMpi ? false, # Increases the runtime closure size by ~700M useOpenCL ? false, useRocm ? config.rocmSupport, llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake @@ -42,11 +43,12 @@ let effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv; suffices = - lib.optionals useOpenCL [ "OpenCL" ] + lib.optionals useBlas [ "BLAS" ] ++ lib.optionals useCuda [ "CUDA" ] - ++ lib.optionals useRocm [ "ROCm" ] ++ lib.optionals useMetalKit [ "MetalKit" ] - ++ lib.optionals useBlas [ "BLAS" ]; + ++ lib.optionals useMpi [ "MPI" ] + ++ lib.optionals useOpenCL [ "OpenCL" ] + ++ lib.optionals useRocm [ "ROCm" ]; pnameSuffix = strings.optionalString (suffices != [ ]) @@ -149,11 +151,11 @@ effectiveStdenv.mkDerivation ( ]; buildInputs = - [ mpi ] - ++ optionals useOpenCL [ clblast ] + optionals effectiveStdenv.isDarwin darwinBuildInputs ++ optionals useCuda cudaBuildInputs - ++ optionals useRocm rocmBuildInputs - ++ optionals effectiveStdenv.isDarwin darwinBuildInputs; + ++ optionals useMpi [ mpi ] + ++ optionals useOpenCL [ clblast ] + ++ optionals useRocm rocmBuildInputs; cmakeFlags = [ @@ -166,6 +168,7 @@ effectiveStdenv.mkDerivation ( (cmakeBool "LLAMA_CUBLAS" useCuda) (cmakeBool "LLAMA_HIPBLAS" useRocm) (cmakeBool "LLAMA_METAL" useMetalKit) + (cmakeBool "LLAMA_MPI" useMpi) ] ++ optionals useCuda [ ( @@ -203,6 +206,7 @@ effectiveStdenv.mkDerivation ( useBlas useCuda useMetalKit + useMpi useOpenCL useRocm ; diff --git a/flake.nix b/flake.nix index 3575cbf12607a..d240ececad7bd 100644 --- a/flake.nix +++ b/flake.nix @@ -85,6 +85,9 @@ opencl = config.packages.default.override { useOpenCL = true; }; cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; + + mpi-cpu = config.packages.default.override { useMpi = true; }; + mpi-cuda = config.packages.default.override { useMpi = true; }; }; }; }; From d0adab60d5d20bf0db25bc026fe5c7790d47f5ef Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 20:04:49 +0000 Subject: [PATCH 22/23] nix: explicit jetson support --- .devops/nix/jetson-support.nix | 32 ++++++++++++++++++++++++++++++++ flake.nix | 1 + 2 files changed, 33 insertions(+) create mode 100644 .devops/nix/jetson-support.nix diff --git a/.devops/nix/jetson-support.nix b/.devops/nix/jetson-support.nix new file mode 100644 index 0000000000000..08426d2abb7ec --- /dev/null +++ b/.devops/nix/jetson-support.nix @@ -0,0 +1,32 @@ +{ inputs, ... }: +{ + perSystem = + { + config, + system, + lib, + pkgsCuda, + ... + }: + lib.optionalAttrs (system == "aarch64-linux") { + packages = + let + caps.jetson-xavier = "7.2"; + caps.jetson-orin = "8.7"; + caps.jetson-nano = "5.3"; + + pkgsFor = + cap: + import inputs.nixpkgs { + inherit system; + config = { + cudaSupport = true; + cudaCapabilities = [ cap ]; + cudaEnableForwardCompat = false; + inherit (pkgsCuda.config) allowUnfreePredicate; + }; + }; + in + builtins.mapAttrs (name: cap: ((pkgsFor cap).callPackage ./scope.nix { }).llama-cpp) caps; + }; +} diff --git a/flake.nix b/flake.nix index d240ececad7bd..b0a6abd3cd611 100644 --- a/flake.nix +++ b/flake.nix @@ -34,6 +34,7 @@ .devops/nix/nixpkgs-instances.nix .devops/nix/apps.nix .devops/nix/devshells.nix + .devops/nix/jetson-support.nix ]; # An overlay can be used to have a more granular control over llama-cpp's From 3f7003b4bb6e9314ba4f65770dfdcea08a540e7b Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Dec 2023 22:41:53 +0000 Subject: [PATCH 23/23] flake.nix: darwin: only expose the default --- flake.nix | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/flake.nix b/flake.nix index b0a6abd3cd611..2209070aa83cd 100644 --- a/flake.nix +++ b/flake.nix @@ -73,6 +73,7 @@ perSystem = { config, + lib, pkgs, pkgsCuda, pkgsRocm, @@ -81,15 +82,18 @@ { # We don't use the overlay here so as to avoid making too many instances of nixpkgs, # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs - packages = { - default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; - opencl = config.packages.default.override { useOpenCL = true; }; - cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; - rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; + packages = + { + default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; + } + // lib.optionalAttrs pkgs.stdenv.isLinux { + opencl = config.packages.default.override { useOpenCL = true; }; + cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; + rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp; - mpi-cpu = config.packages.default.override { useMpi = true; }; - mpi-cuda = config.packages.default.override { useMpi = true; }; - }; + mpi-cpu = config.packages.default.override { useMpi = true; }; + mpi-cuda = config.packages.default.override { useMpi = true; }; + }; }; }; }