From d3e73df66af43ad51706e78c5ad820767d677405 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 03:09:04 +0000
Subject: [PATCH 01/23] flake.lock: update to hotfix CUDA::cuda_driver

Required to support https://github.com/ggerganov/llama.cpp/pull/4606
---
 flake.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flake.lock b/flake.lock
index 0455f65617a2d..0b9c9768b9d42 100644
--- a/flake.lock
+++ b/flake.lock
@@ -20,11 +20,11 @@
     },
     "nixpkgs": {
       "locked": {
-        "lastModified": 1698318101,
-        "narHash": "sha256-gUihHt3yPD7bVqg+k/UVHgngyaJ3DMEBchbymBMvK1E=",
+        "lastModified": 1703559957,
+        "narHash": "sha256-x9PUuMEPGUOMB51zNxrDr2QoHbYWlCS2xhFedm9MC5Q=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "63678e9f3d3afecfeafa0acead6239cdb447574c",
+        "rev": "75dd68c36f458c6593c5bbb48abfd3e59bfed380",
         "type": "github"
       },
       "original": {

From 8364cf4d0b4461f7ab8d6d3319688f2ab5b2ac32 Mon Sep 17 00:00:00 2001
From: Philip Taron <philip.taron@gmail.com>
Date: Fri, 22 Dec 2023 12:33:09 -0800
Subject: [PATCH 02/23] flake.nix: rewrite

1. Split into separate files per output.

2. Added overlays, so that this flake can be integrated into others.
   The names in the overlay are `llama-cpp`, `llama-cpp-opencl`,
   `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the
   broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs).

3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/)
   rather than `with pkgs;` so that there's dependency injection rather
   than dependency lookup.

4. Add a description and meta information for each package.
   The description includes a bit about what's trying to accelerate each one.

5. Use specific CUDA packages instead of cudatoolkit on the advice of SomeoneSerge.

6. Format with `serokell/nixfmt` for a consistent style.

7. Update `flake.lock` with the latest goods.
---
 .devops/nix/apps.nix      |  14 +++
 .devops/nix/devshells.nix |  10 ++
 .devops/nix/overlay.nix   |  17 +++
 .devops/nix/package.nix   | 182 +++++++++++++++++++++++++++++++
 flake.lock                |  34 ------
 flake.nix                 | 220 +++++++++++++++-----------------------
 6 files changed, 310 insertions(+), 167 deletions(-)
 create mode 100644 .devops/nix/apps.nix
 create mode 100644 .devops/nix/devshells.nix
 create mode 100644 .devops/nix/overlay.nix
 create mode 100644 .devops/nix/package.nix

diff --git a/.devops/nix/apps.nix b/.devops/nix/apps.nix
new file mode 100644
index 0000000000000..d9b6a1e000628
--- /dev/null
+++ b/.devops/nix/apps.nix
@@ -0,0 +1,14 @@
+{ package, binaries }:
+
+let
+  default = builtins.elemAt binaries 0;
+  mkApp = name: {
+    ${name} = {
+      type = "app";
+      program = "${package}/bin/${name}";
+    };
+  };
+  result = builtins.foldl' (acc: name: (mkApp name) // acc) { } binaries;
+in
+
+result // { default = result.${default}; }
diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix
new file mode 100644
index 0000000000000..f8d541f3068a5
--- /dev/null
+++ b/.devops/nix/devshells.nix
@@ -0,0 +1,10 @@
+{ concatMapAttrs, packages }:
+
+concatMapAttrs
+  (name: package: {
+    ${name} = package.passthru.shell.overrideAttrs (prevAttrs: { inputsFrom = [ package ]; });
+    ${name + "-extra"} = package.passthru.shell-extra.overrideAttrs (
+      prevAttrs: { inputsFrom = [ package ]; }
+    );
+  })
+  packages
diff --git a/.devops/nix/overlay.nix b/.devops/nix/overlay.nix
new file mode 100644
index 0000000000000..e5fede7740641
--- /dev/null
+++ b/.devops/nix/overlay.nix
@@ -0,0 +1,17 @@
+final: prev:
+
+let
+  inherit (final.stdenv) isAarch64 isDarwin;
+
+  darwinSpecific =
+    if isAarch64 then
+      { inherit (final.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit; }
+    else
+      { inherit (final.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo; };
+
+  osSpecific = if isDarwin then darwinSpecific else { };
+in
+
+{
+  llama-cpp = final.callPackage ./package.nix osSpecific;
+}
diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
new file mode 100644
index 0000000000000..460a32e47b1f0
--- /dev/null
+++ b/.devops/nix/package.nix
@@ -0,0 +1,182 @@
+{
+  lib,
+  config,
+  stdenv,
+  mkShell,
+  cmake,
+  ninja,
+  pkg-config,
+  git,
+  python3,
+  mpi,
+  openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations
+  cudaPackages,
+  rocmPackages,
+  clblast,
+  Accelerate ? null,
+  MetalKit ? null,
+  CoreVideo ? null,
+  CoreGraphics ? null,
+  useOpenCL ? false,
+  useCuda ? config.cudaSupport,
+  useRocm ? config.rocmSupport,
+}@inputs:
+
+let
+  inherit (lib)
+    cmakeBool
+    cmakeFeature
+    optionals
+    versionOlder
+    ;
+  isDefault = !useOpenCL && !useCuda && !useRocm;
+
+  # It's necessary to consistently use backendStdenv when building with CUDA support,
+  # otherwise we get libstdc++ errors downstream.
+  stdenv = throw "Use effectiveStdenv instead";
+  effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
+
+  # Give a little description difference between the flavors.
+  descriptionSuffix =
+    if useOpenCL then
+      " (OpenCL accelerated)"
+    else if useCuda then
+      " (CUDA accelerated)"
+    else if useRocm then
+      " (ROCm accelerated)"
+    else if (MetalKit != null) then
+      " (MetalKit accelerated)"
+    else
+      "";
+
+  # TODO: package the Python in this repository in a Nix-like way.
+  # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
+  # is PEP 517-compatible, and ensure the correct .dist-info is generated.
+  # https://peps.python.org/pep-0517/
+  llama-python = python3.withPackages (
+    ps: [
+      ps.numpy
+      ps.sentencepiece
+    ]
+  );
+
+  # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
+  llama-python-extra = python3.withPackages (
+    ps: [
+      ps.numpy
+      ps.sentencepiece
+      ps.torchWithoutCuda
+      ps.transformers
+    ]
+  );
+
+  # See ./overlay.nix for where these dependencies are passed in.
+  defaultBuildInputs = builtins.filter (p: p != null) [
+    Accelerate
+    MetalKit
+    CoreVideo
+    CoreGraphics
+  ];
+
+  cudaBuildInputs = with cudaPackages; [
+    cuda_cccl.dev # <nv/target>
+    cuda_cudart
+    libcublas
+  ];
+
+  rocmBuildInputs = with rocmPackages; [
+    clr
+    hipblas
+    rocblas
+  ];
+in
+
+effectiveStdenv.mkDerivation {
+  name = "llama.cpp";
+  src = ../../.;
+  meta = {
+    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
+    mainProgram = "llama";
+  };
+
+  postPatch = ''
+    substituteInPlace ./ggml-metal.m \
+      --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
+
+    # TODO: Package up each Python script or service appropriately.
+    # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
+    # we could make those *.py into setuptools' entrypoints
+    substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
+  '';
+
+  nativeBuildInputs = [
+    cmake
+    ninja
+    pkg-config
+    git
+  ] ++ optionals useCuda [ cudaPackages.cuda_nvcc ];
+
+  buildInputs =
+    [ mpi ]
+    ++ optionals useOpenCL [ clblast ]
+    ++ optionals useCuda cudaBuildInputs
+    ++ optionals useRocm rocmBuildInputs
+    ++ optionals isDefault defaultBuildInputs;
+
+  cmakeFlags =
+    [
+      (cmakeBool "LLAMA_NATIVE" true)
+      (cmakeBool "LLAMA_BUILD_SERVER" true)
+      (cmakeBool "BUILD_SHARED_LIBS" true)
+      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
+    ]
+    ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ]
+    ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ]
+    ++ optionals useRocm [
+      (cmakeBool "LLAMA_HIPBLAS" true)
+      (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
+      (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
+
+      # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
+      # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
+      # and select the line that matches the current nixpkgs version of rocBLAS.
+      # Should likely use `rocmPackages.clr.gpuTargets`.
+      "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
+    ]
+    ++ optionals isDefault (
+      if (MetalKit != null) then
+        [
+          "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
+          "-DLLAMA_METAL=ON"
+        ]
+      else
+        [
+          "-DLLAMA_BLAS=ON"
+          "-DLLAMA_BLAS_VENDOR=OpenBLAS"
+        ]
+    );
+
+  # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
+  # if they haven't been added yet.
+  postInstall = ''
+    mv $out/bin/main $out/bin/llama
+    mv $out/bin/server $out/bin/llama-server
+    mkdir -p $out/include
+    cp $src/llama.h $out/include/
+  '';
+
+  # Define the shells here, but don't add in the inputsFrom to avoid recursion.
+  passthru = {
+    shell = mkShell {
+      name = "default${descriptionSuffix}";
+      description = "contains numpy and sentencepiece";
+      buildInputs = [ llama-python ];
+    };
+
+    shell-extra = mkShell {
+      name = "extra${descriptionSuffix}";
+      description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
+      buildInputs = [ llama-python-extra ];
+    };
+  };
+}
diff --git a/flake.lock b/flake.lock
index 0b9c9768b9d42..656792f21cbf9 100644
--- a/flake.lock
+++ b/flake.lock
@@ -1,23 +1,5 @@
 {
   "nodes": {
-    "flake-utils": {
-      "inputs": {
-        "systems": "systems"
-      },
-      "locked": {
-        "lastModified": 1694529238,
-        "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
-        "type": "github"
-      },
-      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "type": "github"
-      }
-    },
     "nixpkgs": {
       "locked": {
         "lastModified": 1703559957,
@@ -36,24 +18,8 @@
     },
     "root": {
       "inputs": {
-        "flake-utils": "flake-utils",
         "nixpkgs": "nixpkgs"
       }
-    },
-    "systems": {
-      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
-        "type": "github"
-      }
     }
   },
   "root": "root",
diff --git a/flake.nix b/flake.nix
index 4cf28d5c11c0f..dcf8e1d9defa0 100644
--- a/flake.nix
+++ b/flake.nix
@@ -1,139 +1,93 @@
 {
   inputs = {
     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
-    flake-utils.url = "github:numtide/flake-utils";
   };
-  outputs = { self, nixpkgs, flake-utils }:
-    flake-utils.lib.eachDefaultSystem (system:
-      let
-        name = "llama.cpp";
-        src = ./.;
-        meta.mainProgram = "llama";
-        inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin;
-        buildInputs = with pkgs; [ openmpi ];
-        osSpecific = with pkgs; buildInputs ++ (
-          if isAarch64 && isDarwin then
-            with pkgs.darwin.apple_sdk_11_0.frameworks; [
-              Accelerate
-              MetalKit
-            ]
-          else if isAarch32 && isDarwin then
-            with pkgs.darwin.apple_sdk.frameworks; [
-              Accelerate
-              CoreGraphics
-              CoreVideo
-            ]
-          else if isDarwin then
-            with pkgs.darwin.apple_sdk.frameworks; [
-              Accelerate
-              CoreGraphics
-              CoreVideo
-            ]
-          else
-            with pkgs; [ openblas ]
-        );
-        pkgs = import nixpkgs { inherit system; };
-        nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ];
-        cudatoolkit_joined = with pkgs; symlinkJoin {
-          # HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit
-          # see https://github.com/NixOS/nixpkgs/issues/224291
-          # copied from jaxlib
-          name = "${cudaPackages.cudatoolkit.name}-merged";
-          paths = [
-            cudaPackages.cudatoolkit.lib
-            cudaPackages.cudatoolkit.out
-          ] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [
-            # for some reason some of the required libs are in the targets/x86_64-linux
-            # directory; not sure why but this works around it
-            "${cudaPackages.cudatoolkit}/targets/${system}"
+
+  outputs =
+    { self, nixpkgs }:
+
+    let
+      systems = [
+        "aarch64-darwin"
+        "aarch64-linux"
+        "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant)
+        "x86_64-linux"
+      ];
+      eachSystem = f: nixpkgs.lib.genAttrs systems (system: f system);
+    in
+
+    {
+      # These define the various ways to build the llama.cpp project.
+      # Integrate them into your flake.nix configuration by adding this overlay to nixpkgs.overlays.
+      overlays.default = import ./.devops/nix/overlay.nix;
+
+      # These use the package definition from `./.devops/nix/package.nix`.
+      # There's one per backend that llama-cpp uses. Add more as needed!
+      packages = eachSystem (
+        system:
+        let
+          defaultConfig = {
+            inherit system;
+            overlays = [ self.overlays.default ];
+          };
+          pkgs = import nixpkgs defaultConfig;
+
+          # Let's not make a big deal about getting the CUDA bits.
+          cudaConfig = defaultConfig // {
+            config.cudaSupport = true;
+            config.allowUnfreePredicate =
+              p:
+              builtins.all
+                (
+                  license:
+                  license.free
+                  || builtins.elem license.shortName [
+                    "CUDA EULA"
+                    "cuDNN EULA"
+                  ]
+                )
+                (p.meta.licenses or [ p.meta.license ]);
+          };
+          pkgsCuda = import nixpkgs cudaConfig;
+
+          # Let's make sure to turn on ROCm support across the whole package ecosystem.
+          rocmConfig = defaultConfig // {
+            config.rocmSupport = true;
+          };
+          pkgsRocm = import nixpkgs rocmConfig;
+        in
+        {
+          default = pkgs.llama-cpp;
+          opencl = pkgs.llama-cpp.override { useOpenCL = true; };
+          cuda = pkgsCuda.llama-cpp;
+          rocm = pkgsRocm.llama-cpp;
+        }
+      );
+
+      # These use the definition of llama-cpp from `./.devops/nix/package.nix`
+      # and expose various binaries as apps with `nix run .#app-name`.
+      # Note that none of these apps use anything other than the default backend.
+      apps = eachSystem (
+        system:
+        import ./.devops/nix/apps.nix {
+          package = self.packages.${system}.default;
+          binaries = [
+            "llama"
+            "llama-embedding"
+            "llama-server"
+            "quantize"
+            "train-text-from-scratch"
           ];
-        };
-        llama-python =
-          pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
-        # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
-        llama-python-extra =
-          pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]);
-        postPatch = ''
-          substituteInPlace ./ggml-metal.m \
-            --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
-          substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python'
-        '';
-        postInstall = ''
-          mv $out/bin/main $out/bin/llama
-          mv $out/bin/server $out/bin/llama-server
-          mkdir -p $out/include
-          cp ${src}/llama.h $out/include/
-        '';
-        cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
-      in
-      {
-        packages.default = pkgs.stdenv.mkDerivation {
-          inherit name src meta postPatch nativeBuildInputs postInstall;
-          buildInputs = osSpecific;
-          cmakeFlags = cmakeFlags
-            ++ (if isAarch64 && isDarwin then [
-            "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
-            "-DLLAMA_METAL=ON"
-          ] else [
-            "-DLLAMA_BLAS=ON"
-            "-DLLAMA_BLAS_VENDOR=OpenBLAS"
-          ]);
-        };
-        packages.opencl = pkgs.stdenv.mkDerivation {
-          inherit name src meta postPatch nativeBuildInputs postInstall;
-          buildInputs = with pkgs; buildInputs ++ [ clblast ];
-          cmakeFlags = cmakeFlags ++ [
-            "-DLLAMA_CLBLAST=ON"
-          ];
-        };
-        packages.cuda = pkgs.stdenv.mkDerivation {
-          inherit name src meta postPatch nativeBuildInputs postInstall;
-          buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ];
-          cmakeFlags = cmakeFlags ++ [
-            "-DLLAMA_CUBLAS=ON"
-          ];
-        };
-        packages.rocm = pkgs.stdenv.mkDerivation {
-          inherit name src meta postPatch nativeBuildInputs postInstall;
-          buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ];
-          cmakeFlags = cmakeFlags ++ [
-            "-DLLAMA_HIPBLAS=1"
-            "-DCMAKE_C_COMPILER=hipcc"
-            "-DCMAKE_CXX_COMPILER=hipcc"
-            # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
-            # in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
-            # and select the line that matches the current nixpkgs version of rocBLAS.
-            "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
-          ];
-        };
-        apps.llama-server = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/llama-server";
-        };
-        apps.llama-embedding = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/embedding";
-        };
-        apps.llama = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/llama";
-        };
-        apps.quantize = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/quantize";
-        };
-        apps.train-text-from-scratch = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/train-text-from-scratch";
-        };
-        apps.default = self.apps.${system}.llama;
-        devShells.default = pkgs.mkShell {
-          buildInputs = [ llama-python ];
-          packages = nativeBuildInputs ++ osSpecific;
-        };
-        devShells.extra = pkgs.mkShell {
-          buildInputs = [ llama-python-extra ];
-          packages = nativeBuildInputs ++ osSpecific;
-        };
-      });
+        }
+      );
+
+      # These expose a build environment for either a "default" or an "extra" set of dependencies.
+      devShells = eachSystem (
+        system:
+        import ./.devops/nix/devshells.nix {
+          concatMapAttrs = nixpkgs.lib.concatMapAttrs;
+          packages = self.packages.${system};
+        }
+      );
+    };
 }

From 0607e24ec22caa316648015f6831c41619ffe9a0 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Sun, 24 Dec 2023 18:15:25 +0000
Subject: [PATCH 03/23] flake.nix: use finalPackage instead of passing it
 manually

---
 .devops/nix/devshells.nix | 6 ++----
 .devops/nix/package.nix   | 6 ++++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix
index f8d541f3068a5..afaaa2644059b 100644
--- a/.devops/nix/devshells.nix
+++ b/.devops/nix/devshells.nix
@@ -2,9 +2,7 @@
 
 concatMapAttrs
   (name: package: {
-    ${name} = package.passthru.shell.overrideAttrs (prevAttrs: { inputsFrom = [ package ]; });
-    ${name + "-extra"} = package.passthru.shell-extra.overrideAttrs (
-      prevAttrs: { inputsFrom = [ package ]; }
-    );
+    ${name} = package.passthru.shell;
+    ${name + "-extra"} = package.passthru.shell-extra;
   })
   packages
diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 460a32e47b1f0..bd2dbf4b2c4bd 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -91,7 +91,7 @@ let
   ];
 in
 
-effectiveStdenv.mkDerivation {
+effectiveStdenv.mkDerivation (finalAttrs: {
   name = "llama.cpp";
   src = ../../.;
   meta = {
@@ -171,12 +171,14 @@ effectiveStdenv.mkDerivation {
       name = "default${descriptionSuffix}";
       description = "contains numpy and sentencepiece";
       buildInputs = [ llama-python ];
+      inputsFrom = [ finalAttrs.finalPackage ];
     };
 
     shell-extra = mkShell {
       name = "extra${descriptionSuffix}";
       description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
       buildInputs = [ llama-python-extra ];
+      inputsFrom = [ finalAttrs.finalPackage ];
     };
   };
-}
+})

From eab1c125b9871553ccd2d0aea49f143f9051f581 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Sun, 24 Dec 2023 19:35:32 +0000
Subject: [PATCH 04/23] nix: unclutter darwin support

---
 .devops/nix/overlay.nix | 14 +----------
 .devops/nix/package.nix | 52 ++++++++++++++++++++---------------------
 2 files changed, 26 insertions(+), 40 deletions(-)

diff --git a/.devops/nix/overlay.nix b/.devops/nix/overlay.nix
index e5fede7740641..c7baec8434fa4 100644
--- a/.devops/nix/overlay.nix
+++ b/.devops/nix/overlay.nix
@@ -1,17 +1,5 @@
 final: prev:
 
-let
-  inherit (final.stdenv) isAarch64 isDarwin;
-
-  darwinSpecific =
-    if isAarch64 then
-      { inherit (final.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit; }
-    else
-      { inherit (final.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo; };
-
-  osSpecific = if isDarwin then darwinSpecific else { };
-in
-
 {
-  llama-cpp = final.callPackage ./package.nix osSpecific;
+  llama-cpp = final.callPackage ./package.nix { };
 }
diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index bd2dbf4b2c4bd..e286fda191b66 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -11,14 +11,18 @@
   mpi,
   openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations
   cudaPackages,
+  darwin,
   rocmPackages,
   clblast,
-  Accelerate ? null,
-  MetalKit ? null,
-  CoreVideo ? null,
-  CoreGraphics ? null,
-  useOpenCL ? false,
+  useBlas ? builtins.all (x: !x) [
+    useCuda
+    useMetalKit
+    useOpenCL
+    useRocm
+  ],
   useCuda ? config.cudaSupport,
+  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
+  useOpenCL ? false,
   useRocm ? config.rocmSupport,
 }@inputs:
 
@@ -29,7 +33,6 @@ let
     optionals
     versionOlder
     ;
-  isDefault = !useOpenCL && !useCuda && !useRocm;
 
   # It's necessary to consistently use backendStdenv when building with CUDA support,
   # otherwise we get libstdc++ errors downstream.
@@ -44,7 +47,7 @@ let
       " (CUDA accelerated)"
     else if useRocm then
       " (ROCm accelerated)"
-    else if (MetalKit != null) then
+    else if useMetalKit then
       " (MetalKit accelerated)"
     else
       "";
@@ -70,13 +73,16 @@ let
     ]
   );
 
-  # See ./overlay.nix for where these dependencies are passed in.
-  defaultBuildInputs = builtins.filter (p: p != null) [
-    Accelerate
-    MetalKit
-    CoreVideo
-    CoreGraphics
-  ];
+  # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
+  # separately
+  darwinBuildInputs =
+    with darwin.apple_sdk.frameworks;
+    [ Accelerate ]
+    ++ optionals useMetalKit [ MetalKit ]
+    ++ optionals (!useMetalKit) [
+      CoreVideo
+      CoreGraphics
+    ];
 
   cudaBuildInputs = with cudaPackages; [
     cuda_cccl.dev # <nv/target>
@@ -121,7 +127,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
     ++ optionals useOpenCL [ clblast ]
     ++ optionals useCuda cudaBuildInputs
     ++ optionals useRocm rocmBuildInputs
-    ++ optionals isDefault defaultBuildInputs;
+    ++ optionals effectiveStdenv.isDarwin darwinBuildInputs;
 
   cmakeFlags =
     [
@@ -129,6 +135,8 @@ effectiveStdenv.mkDerivation (finalAttrs: {
       (cmakeBool "LLAMA_BUILD_SERVER" true)
       (cmakeBool "BUILD_SHARED_LIBS" true)
       (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
+      (cmakeBool "LLAMA_METAL" useMetalKit)
+      (cmakeBool "LLAMA_BLAS" useBlas)
     ]
     ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ]
     ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ]
@@ -143,18 +151,8 @@ effectiveStdenv.mkDerivation (finalAttrs: {
       # Should likely use `rocmPackages.clr.gpuTargets`.
       "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
     ]
-    ++ optionals isDefault (
-      if (MetalKit != null) then
-        [
-          "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
-          "-DLLAMA_METAL=ON"
-        ]
-      else
-        [
-          "-DLLAMA_BLAS=ON"
-          "-DLLAMA_BLAS_VENDOR=OpenBLAS"
-        ]
-    );
+    ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
+    ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
 
   # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
   # if they haven't been added yet.

From 02599417918eaa5a9ee98d7408f372dbaf25ce2f Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Sun, 24 Dec 2023 19:36:30 +0000
Subject: [PATCH 05/23] nix: pass most darwin frameworks unconditionally

...for simplicity
---
 .devops/nix/package.nix | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index e286fda191b66..1d401a9ee4ce9 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -77,12 +77,12 @@ let
   # separately
   darwinBuildInputs =
     with darwin.apple_sdk.frameworks;
-    [ Accelerate ]
-    ++ optionals useMetalKit [ MetalKit ]
-    ++ optionals (!useMetalKit) [
+    [
+      Accelerate
       CoreVideo
       CoreGraphics
-    ];
+    ]
+    ++ optionals useMetalKit [ MetalKit ];
 
   cudaBuildInputs = with cudaPackages; [
     cuda_cccl.dev # <nv/target>

From 0fa62c1ab93500e474778a3f2d98f30991d63837 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Mon, 25 Dec 2023 16:23:56 +0000
Subject: [PATCH 06/23] *.nix: nixfmt

nix shell github:piegamesde/nixfmt/rfc101-style --command \
    nixfmt flake.nix .devops/nix/*.nix
---
 .devops/nix/package.nix | 162 ++++++++++++++++++++--------------------
 1 file changed, 82 insertions(+), 80 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 1d401a9ee4ce9..5b88cf079f605 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -97,86 +97,88 @@ let
   ];
 in
 
-effectiveStdenv.mkDerivation (finalAttrs: {
-  name = "llama.cpp";
-  src = ../../.;
-  meta = {
-    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
-    mainProgram = "llama";
-  };
-
-  postPatch = ''
-    substituteInPlace ./ggml-metal.m \
-      --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
-
-    # TODO: Package up each Python script or service appropriately.
-    # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
-    # we could make those *.py into setuptools' entrypoints
-    substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
-  '';
-
-  nativeBuildInputs = [
-    cmake
-    ninja
-    pkg-config
-    git
-  ] ++ optionals useCuda [ cudaPackages.cuda_nvcc ];
-
-  buildInputs =
-    [ mpi ]
-    ++ optionals useOpenCL [ clblast ]
-    ++ optionals useCuda cudaBuildInputs
-    ++ optionals useRocm rocmBuildInputs
-    ++ optionals effectiveStdenv.isDarwin darwinBuildInputs;
-
-  cmakeFlags =
-    [
-      (cmakeBool "LLAMA_NATIVE" true)
-      (cmakeBool "LLAMA_BUILD_SERVER" true)
-      (cmakeBool "BUILD_SHARED_LIBS" true)
-      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
-      (cmakeBool "LLAMA_METAL" useMetalKit)
-      (cmakeBool "LLAMA_BLAS" useBlas)
-    ]
-    ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ]
-    ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ]
-    ++ optionals useRocm [
-      (cmakeBool "LLAMA_HIPBLAS" true)
-      (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
-      (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
-
-      # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
-      # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
-      # and select the line that matches the current nixpkgs version of rocBLAS.
-      # Should likely use `rocmPackages.clr.gpuTargets`.
-      "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
-    ]
-    ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
-    ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
-
-  # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
-  # if they haven't been added yet.
-  postInstall = ''
-    mv $out/bin/main $out/bin/llama
-    mv $out/bin/server $out/bin/llama-server
-    mkdir -p $out/include
-    cp $src/llama.h $out/include/
-  '';
-
-  # Define the shells here, but don't add in the inputsFrom to avoid recursion.
-  passthru = {
-    shell = mkShell {
-      name = "default${descriptionSuffix}";
-      description = "contains numpy and sentencepiece";
-      buildInputs = [ llama-python ];
-      inputsFrom = [ finalAttrs.finalPackage ];
+effectiveStdenv.mkDerivation (
+  finalAttrs: {
+    name = "llama.cpp";
+    src = ../../.;
+    meta = {
+      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
+      mainProgram = "llama";
     };
 
-    shell-extra = mkShell {
-      name = "extra${descriptionSuffix}";
-      description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
-      buildInputs = [ llama-python-extra ];
-      inputsFrom = [ finalAttrs.finalPackage ];
+    postPatch = ''
+      substituteInPlace ./ggml-metal.m \
+        --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
+
+      # TODO: Package up each Python script or service appropriately.
+      # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
+      # we could make those *.py into setuptools' entrypoints
+      substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
+    '';
+
+    nativeBuildInputs = [
+      cmake
+      ninja
+      pkg-config
+      git
+    ] ++ optionals useCuda [ cudaPackages.cuda_nvcc ];
+
+    buildInputs =
+      [ mpi ]
+      ++ optionals useOpenCL [ clblast ]
+      ++ optionals useCuda cudaBuildInputs
+      ++ optionals useRocm rocmBuildInputs
+      ++ optionals effectiveStdenv.isDarwin darwinBuildInputs;
+
+    cmakeFlags =
+      [
+        (cmakeBool "LLAMA_NATIVE" true)
+        (cmakeBool "LLAMA_BUILD_SERVER" true)
+        (cmakeBool "BUILD_SHARED_LIBS" true)
+        (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
+        (cmakeBool "LLAMA_METAL" useMetalKit)
+        (cmakeBool "LLAMA_BLAS" useBlas)
+      ]
+      ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ]
+      ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ]
+      ++ optionals useRocm [
+        (cmakeBool "LLAMA_HIPBLAS" true)
+        (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
+        (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
+
+        # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
+        # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
+        # and select the line that matches the current nixpkgs version of rocBLAS.
+        # Should likely use `rocmPackages.clr.gpuTargets`.
+        "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
+      ]
+      ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
+      ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
+
+    # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
+    # if they haven't been added yet.
+    postInstall = ''
+      mv $out/bin/main $out/bin/llama
+      mv $out/bin/server $out/bin/llama-server
+      mkdir -p $out/include
+      cp $src/llama.h $out/include/
+    '';
+
+    # Define the shells here, but don't add in the inputsFrom to avoid recursion.
+    passthru = {
+      shell = mkShell {
+        name = "default${descriptionSuffix}";
+        description = "contains numpy and sentencepiece";
+        buildInputs = [ llama-python ];
+        inputsFrom = [ finalAttrs.finalPackage ];
+      };
+
+      shell-extra = mkShell {
+        name = "extra${descriptionSuffix}";
+        description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
+        buildInputs = [ llama-python-extra ];
+        inputsFrom = [ finalAttrs.finalPackage ];
+      };
     };
-  };
-})
+  }
+)

From 69c56bc0f43d76495c910bcae0858f010ec63256 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Mon, 25 Dec 2023 17:29:02 +0000
Subject: [PATCH 07/23] flake.nix: add maintainers

---
 .devops/nix/package.nix | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 5b88cf079f605..12b8f66451f47 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -104,6 +104,16 @@ effectiveStdenv.mkDerivation (
     meta = {
       description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
       mainProgram = "llama";
+
+
+      # These people might respond if you ping them in case of Nix-specific
+      # regressions or for reviewing Nix-specific PRs.
+
+      # Note that lib.maintainers is defined in Nixpkgs.
+      maintainers = with lib.maintainers; [
+          philiptaron
+          SomeoneSerge
+      ];
     };
 
     postPatch = ''

From a07407c98defd7a00fdac62338c90ebb0d90b61a Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Mon, 25 Dec 2023 17:29:50 +0000
Subject: [PATCH 08/23] nix: move meta down to follow Nixpkgs style more
 closely

---
 .devops/nix/package.nix | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 12b8f66451f47..471c46b2a7235 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -101,20 +101,6 @@ effectiveStdenv.mkDerivation (
   finalAttrs: {
     name = "llama.cpp";
     src = ../../.;
-    meta = {
-      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
-      mainProgram = "llama";
-
-
-      # These people might respond if you ping them in case of Nix-specific
-      # regressions or for reviewing Nix-specific PRs.
-
-      # Note that lib.maintainers is defined in Nixpkgs.
-      maintainers = with lib.maintainers; [
-          philiptaron
-          SomeoneSerge
-      ];
-    };
 
     postPatch = ''
       substituteInPlace ./ggml-metal.m \
@@ -190,5 +176,20 @@ effectiveStdenv.mkDerivation (
         inputsFrom = [ finalAttrs.finalPackage ];
       };
     };
+
+    meta = {
+      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
+      mainProgram = "llama";
+
+
+      # These people might respond if you ping them in case of Nix-specific
+      # regressions or for reviewing Nix-specific PRs.
+
+      # Note that lib.maintainers is defined in Nixpkgs.
+      maintainers = with lib.maintainers; [
+          philiptaron
+          SomeoneSerge
+      ];
+    };
   }
 )

From 04bc417466d72c302831e90bc5c13257890b3a31 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Mon, 25 Dec 2023 17:32:44 +0000
Subject: [PATCH 09/23] nix: add missing meta attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

nix: clarify the interpretation of meta.maintainers

nix: clarify the meaning of "broken" and "badPlatforms"

nix: passthru: expose the use* flags for inspection

E.g.:

```
❯ nix eval .#cuda.useCuda
true
```
---
 .devops/nix/package.nix | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 471c46b2a7235..c6d03b4a480e7 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -162,6 +162,14 @@ effectiveStdenv.mkDerivation (
 
     # Define the shells here, but don't add in the inputsFrom to avoid recursion.
     passthru = {
+      inherit
+        useBlas
+        useCuda
+        useMetalKit
+        useOpenCL
+        useRocm
+        ;
+
       shell = mkShell {
         name = "default${descriptionSuffix}";
         description = "contains numpy and sentencepiece";
@@ -178,18 +186,39 @@ effectiveStdenv.mkDerivation (
     };
 
     meta = {
+      # Configurations we don't want even the CI to evaluate. Results in the
+      # "unsupported platform" messages. This is mostly a no-op, because
+      # cudaPackages would've refused to evaluate anyway.
+      badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
+
+      # Configurations that are known to result in build failures. Can be
+      # overridden by importing Nixpkgs with `allowBroken = true`.
+      broken = (useMetalKit && !effectiveStdenv.isDarwin);
+
       description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
-      mainProgram = "llama";
+      homepage = "https://github.com/ggerganov/llama.cpp/";
+      license = lib.licenses.mit;
 
+      # Accommodates `nix run` and `lib.getExe`
+      mainProgram = "llama";
 
-      # These people might respond if you ping them in case of Nix-specific
-      # regressions or for reviewing Nix-specific PRs.
+      # These people might respond, on the best effort basis, if you ping them
+      # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
+      # Consider adding yourself to this list if you want to ensure this flake
+      # stays maintained and you're willing to invest your time. Do not add
+      # other people without their consent. Consider removing people after
+      # they've been unreachable for long periods of time.
 
-      # Note that lib.maintainers is defined in Nixpkgs.
+      # Note that lib.maintainers is defined in Nixpkgs, but you may just add
+      # an attrset following the same format as in
+      # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
       maintainers = with lib.maintainers; [
-          philiptaron
-          SomeoneSerge
+        philiptaron
+        SomeoneSerge
       ];
+
+      # Extend `badPlatforms` instead
+      platforms = lib.platforms.all;
     };
   }
 )

From d08690af65af266414512d308d88fa1715f75821 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Mon, 25 Dec 2023 15:27:58 +0000
Subject: [PATCH 10/23] flake.nix: avoid re-evaluating nixpkgs too many times

---
 .devops/nix/overlay.nix |  5 ----
 .devops/nix/scope.nix   |  3 +++
 flake.nix               | 52 +++++++++++++++++++++++++++--------------
 3 files changed, 38 insertions(+), 22 deletions(-)
 delete mode 100644 .devops/nix/overlay.nix
 create mode 100644 .devops/nix/scope.nix

diff --git a/.devops/nix/overlay.nix b/.devops/nix/overlay.nix
deleted file mode 100644
index c7baec8434fa4..0000000000000
--- a/.devops/nix/overlay.nix
+++ /dev/null
@@ -1,5 +0,0 @@
-final: prev:
-
-{
-  llama-cpp = final.callPackage ./package.nix { };
-}
diff --git a/.devops/nix/scope.nix b/.devops/nix/scope.nix
new file mode 100644
index 0000000000000..78e6a126d3b00
--- /dev/null
+++ b/.devops/nix/scope.nix
@@ -0,0 +1,3 @@
+{ lib, newScope }:
+
+lib.makeScope newScope (self: { llama-cpp = self.callPackage ./package.nix { }; })
diff --git a/flake.nix b/flake.nix
index dcf8e1d9defa0..f837f47cf386c 100644
--- a/flake.nix
+++ b/flake.nix
@@ -17,23 +17,42 @@
     in
 
     {
-      # These define the various ways to build the llama.cpp project.
-      # Integrate them into your flake.nix configuration by adding this overlay to nixpkgs.overlays.
-      overlays.default = import ./.devops/nix/overlay.nix;
+      # An overlay can be used to have a more granular control over llama-cpp's
+      # dependencies and configuration, than that offered by the `.override`
+      # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays.
+      #
+      # E.g. in a flake:
+      # ```
+      # { nixpkgs, llama-cpp, ... }:
+      # let pkgs = import nixpkgs {
+      #     overlays = [ (llama-cpp.overlays.default) ];
+      #     system = "aarch64-linux";
+      #     config.allowUnfree = true;
+      #     config.cudaSupport = true;
+      #     config.cudaCapabilities = [ "7.2" ];
+      #     config.cudaEnableForwardCompat = false;
+      # }; in {
+      #     packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp;
+      # }
+      # ```
+      #
+      # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format
+      overlays.default = (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; });
 
       # These use the package definition from `./.devops/nix/package.nix`.
       # There's one per backend that llama-cpp uses. Add more as needed!
       packages = eachSystem (
         system:
         let
-          defaultConfig = {
+          # Avoid re-evaluation for the nixpkgs instance,
+          # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs
+          pkgs = nixpkgs.legacyPackages.${system};
+
+          # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
+          # and ucx are built with CUDA support)
+          pkgsCuda = import nixpkgs {
             inherit system;
-            overlays = [ self.overlays.default ];
-          };
-          pkgs = import nixpkgs defaultConfig;
 
-          # Let's not make a big deal about getting the CUDA bits.
-          cudaConfig = defaultConfig // {
             config.cudaSupport = true;
             config.allowUnfreePredicate =
               p:
@@ -48,19 +67,18 @@
                 )
                 (p.meta.licenses or [ p.meta.license ]);
           };
-          pkgsCuda = import nixpkgs cudaConfig;
 
-          # Let's make sure to turn on ROCm support across the whole package ecosystem.
-          rocmConfig = defaultConfig // {
+          # Ensure dependencies use ROCm consistently
+          pkgsRocm = import nixpkgs {
+            inherit system;
             config.rocmSupport = true;
           };
-          pkgsRocm = import nixpkgs rocmConfig;
         in
         {
-          default = pkgs.llama-cpp;
-          opencl = pkgs.llama-cpp.override { useOpenCL = true; };
-          cuda = pkgsCuda.llama-cpp;
-          rocm = pkgsRocm.llama-cpp;
+          default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp;
+          opencl = self.packages.${system}.default.override { useOpenCL = true; };
+          cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp;
+          rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp;
         }
       );
 

From a28c9acca3a474a15f2408a774883668bffbc5ae Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Mon, 25 Dec 2023 16:18:52 +0000
Subject: [PATCH 11/23] flake.nix: use flake-parts

---
 .devops/nix/apps.nix              |  34 ++++---
 .devops/nix/devshells.nix         |  21 ++--
 .devops/nix/nixpkgs-instances.nix |  35 +++++++
 flake.lock                        |  37 +++++++
 flake.nix                         | 164 ++++++++++++------------------
 5 files changed, 172 insertions(+), 119 deletions(-)
 create mode 100644 .devops/nix/nixpkgs-instances.nix

diff --git a/.devops/nix/apps.nix b/.devops/nix/apps.nix
index d9b6a1e000628..b8a12cc0a0463 100644
--- a/.devops/nix/apps.nix
+++ b/.devops/nix/apps.nix
@@ -1,14 +1,22 @@
-{ package, binaries }:
-
-let
-  default = builtins.elemAt binaries 0;
-  mkApp = name: {
-    ${name} = {
-      type = "app";
-      program = "${package}/bin/${name}";
+{
+  perSystem =
+    { config, lib, ... }:
+    {
+      apps =
+        let
+          inherit (config.packages) default;
+          binaries = [
+            "llama"
+            "llama-embedding"
+            "llama-server"
+            "quantize"
+            "train-text-from-scratch"
+          ];
+          mkApp = name: {
+            type = "app";
+            program = "${default}/bin/${name}";
+          };
+        in
+        lib.genAttrs binaries mkApp;
     };
-  };
-  result = builtins.foldl' (acc: name: (mkApp name) // acc) { } binaries;
-in
-
-result // { default = result.${default}; }
+}
diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix
index afaaa2644059b..1862f0f085100 100644
--- a/.devops/nix/devshells.nix
+++ b/.devops/nix/devshells.nix
@@ -1,8 +1,13 @@
-{ concatMapAttrs, packages }:
-
-concatMapAttrs
-  (name: package: {
-    ${name} = package.passthru.shell;
-    ${name + "-extra"} = package.passthru.shell-extra;
-  })
-  packages
+{
+  perSystem =
+    { config, lib, ... }:
+    {
+      devShells =
+        lib.concatMapAttrs
+          (name: package: {
+            ${name} = package.passthru.shell;
+            ${name + "-extra"} = package.passthru.shell-extra;
+          })
+          config.packages;
+    };
+}
diff --git a/.devops/nix/nixpkgs-instances.nix b/.devops/nix/nixpkgs-instances.nix
new file mode 100644
index 0000000000000..6e9872b28c8fb
--- /dev/null
+++ b/.devops/nix/nixpkgs-instances.nix
@@ -0,0 +1,35 @@
+{ inputs, ... }:
+{
+  # The _module.args definitions are passed on to modules as arguments. E.g.
+  # the module `{ pkgs ... }: { /* config */ }` implicitly uses
+  # `_module.args.pkgs` (defined in this case by flake-parts).
+  perSystem =
+    { system, ... }:
+    {
+      _module.args = {
+        pkgsCuda = import inputs.nixpkgs {
+          inherit system;
+          # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
+          # and ucx are built with CUDA support)
+          config.cudaSupport = true;
+          config.allowUnfreePredicate =
+            p:
+            builtins.all
+              (
+                license:
+                license.free
+                || builtins.elem license.shortName [
+                  "CUDA EULA"
+                  "cuDNN EULA"
+                ]
+              )
+              (p.meta.licenses or [ p.meta.license ]);
+        };
+        # Ensure dependencies use ROCm consistently
+        pkgsRocm = import inputs.nixpkgs {
+          inherit system;
+          config.rocmSupport = true;
+        };
+      };
+    };
+}
diff --git a/flake.lock b/flake.lock
index 656792f21cbf9..3fcd1f45d5a41 100644
--- a/flake.lock
+++ b/flake.lock
@@ -1,5 +1,23 @@
 {
   "nodes": {
+    "flake-parts": {
+      "inputs": {
+        "nixpkgs-lib": "nixpkgs-lib"
+      },
+      "locked": {
+        "lastModified": 1701473968,
+        "narHash": "sha256-YcVE5emp1qQ8ieHUnxt1wCZCC3ZfAS+SRRWZ2TMda7E=",
+        "owner": "hercules-ci",
+        "repo": "flake-parts",
+        "rev": "34fed993f1674c8d06d58b37ce1e0fe5eebcb9f5",
+        "type": "github"
+      },
+      "original": {
+        "owner": "hercules-ci",
+        "repo": "flake-parts",
+        "type": "github"
+      }
+    },
     "nixpkgs": {
       "locked": {
         "lastModified": 1703559957,
@@ -16,8 +34,27 @@
         "type": "github"
       }
     },
+    "nixpkgs-lib": {
+      "locked": {
+        "dir": "lib",
+        "lastModified": 1701253981,
+        "narHash": "sha256-ztaDIyZ7HrTAfEEUt9AtTDNoCYxUdSd6NrRHaYOIxtk=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "e92039b55bcd58469325ded85d4f58dd5a4eaf58",
+        "type": "github"
+      },
+      "original": {
+        "dir": "lib",
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
     "root": {
       "inputs": {
+        "flake-parts": "flake-parts",
         "nixpkgs": "nixpkgs"
       }
     }
diff --git a/flake.nix b/flake.nix
index f837f47cf386c..ff610ec64d53b 100644
--- a/flake.nix
+++ b/flake.nix
@@ -1,111 +1,79 @@
 {
+  description = "Port of Facebook's LLaMA model in C/C++";
+
   inputs = {
     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+    flake-parts.url = "github:hercules-ci/flake-parts";
   };
 
+  # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl:
+  #
+  # ```bash
+  # ❯ nix repl
+  # nix-repl> :lf github:ggerganov/llama.cpp
+  # Added 13 variables.
+  # nix-repl> outputs.apps.x86_64-linux.quantize
+  # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; }
+  # ```
   outputs =
-    { self, nixpkgs }:
-
-    let
-      systems = [
-        "aarch64-darwin"
-        "aarch64-linux"
-        "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant)
-        "x86_64-linux"
-      ];
-      eachSystem = f: nixpkgs.lib.genAttrs systems (system: f system);
-    in
+    { flake-parts, ... }@inputs:
+    flake-parts.lib.mkFlake { inherit inputs; }
 
-    {
-      # An overlay can be used to have a more granular control over llama-cpp's
-      # dependencies and configuration, than that offered by the `.override`
-      # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays.
-      #
-      # E.g. in a flake:
-      # ```
-      # { nixpkgs, llama-cpp, ... }:
-      # let pkgs = import nixpkgs {
-      #     overlays = [ (llama-cpp.overlays.default) ];
-      #     system = "aarch64-linux";
-      #     config.allowUnfree = true;
-      #     config.cudaSupport = true;
-      #     config.cudaCapabilities = [ "7.2" ];
-      #     config.cudaEnableForwardCompat = false;
-      # }; in {
-      #     packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp;
-      # }
-      # ```
-      #
-      # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format
-      overlays.default = (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; });
+      {
 
-      # These use the package definition from `./.devops/nix/package.nix`.
-      # There's one per backend that llama-cpp uses. Add more as needed!
-      packages = eachSystem (
-        system:
-        let
-          # Avoid re-evaluation for the nixpkgs instance,
-          # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs
-          pkgs = nixpkgs.legacyPackages.${system};
+        imports = [
+          .devops/nix/nixpkgs-instances.nix
+          .devops/nix/apps.nix
+          .devops/nix/devshells.nix
+        ];
 
-          # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
-          # and ucx are built with CUDA support)
-          pkgsCuda = import nixpkgs {
-            inherit system;
+        # An overlay can be used to have a more granular control over llama-cpp's
+        # dependencies and configuration, than that offered by the `.override`
+        # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays.
+        #
+        # E.g. in a flake:
+        # ```
+        # { nixpkgs, llama-cpp, ... }:
+        # let pkgs = import nixpkgs {
+        #     overlays = [ (llama-cpp.overlays.default) ];
+        #     system = "aarch64-linux";
+        #     config.allowUnfree = true;
+        #     config.cudaSupport = true;
+        #     config.cudaCapabilities = [ "7.2" ];
+        #     config.cudaEnableForwardCompat = false;
+        # }; in {
+        #     packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp;
+        # }
+        # ```
+        #
+        # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format
+        flake.overlays.default =
+          (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; });
 
-            config.cudaSupport = true;
-            config.allowUnfreePredicate =
-              p:
-              builtins.all
-                (
-                  license:
-                  license.free
-                  || builtins.elem license.shortName [
-                    "CUDA EULA"
-                    "cuDNN EULA"
-                  ]
-                )
-                (p.meta.licenses or [ p.meta.license ]);
-          };
+        systems = [
+          "aarch64-darwin"
+          "aarch64-linux"
+          "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant)
+          "x86_64-linux"
+        ];
 
-          # Ensure dependencies use ROCm consistently
-          pkgsRocm = import nixpkgs {
-            inherit system;
-            config.rocmSupport = true;
+        perSystem =
+          {
+            config,
+            pkgs,
+            pkgsCuda,
+            pkgsRocm,
+            ...
+          }:
+          {
+            # We don't use the overlay here so as to avoid making too many instances of nixpkgs,
+            # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs
+            packages = {
+              default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp;
+              opencl = config.packages.default.override { useOpenCL = true; };
+              cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp;
+              rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp;
+            };
           };
-        in
-        {
-          default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp;
-          opencl = self.packages.${system}.default.override { useOpenCL = true; };
-          cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp;
-          rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp;
-        }
-      );
-
-      # These use the definition of llama-cpp from `./.devops/nix/package.nix`
-      # and expose various binaries as apps with `nix run .#app-name`.
-      # Note that none of these apps use anything other than the default backend.
-      apps = eachSystem (
-        system:
-        import ./.devops/nix/apps.nix {
-          package = self.packages.${system}.default;
-          binaries = [
-            "llama"
-            "llama-embedding"
-            "llama-server"
-            "quantize"
-            "train-text-from-scratch"
-          ];
-        }
-      );
-
-      # These expose a build environment for either a "default" or an "extra" set of dependencies.
-      devShells = eachSystem (
-        system:
-        import ./.devops/nix/devshells.nix {
-          concatMapAttrs = nixpkgs.lib.concatMapAttrs;
-          packages = self.packages.${system};
-        }
-      );
-    };
+      };
 }

From a629371245a292f047c2bca9d2c8e9034338130a Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Mon, 25 Dec 2023 17:02:36 +0000
Subject: [PATCH 12/23] nix: migrate to pname+version

---
 .devops/nix/package.nix | 33 +++++++++++++++++++--------------
 .devops/nix/scope.nix   | 13 +++++++++++--
 flake.nix               | 15 ++++++++++-----
 3 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index c6d03b4a480e7..8fe250651fdd3 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -24,6 +24,7 @@
   useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
   useOpenCL ? false,
   useRocm ? config.rocmSupport,
+  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
 }@inputs:
 
 let
@@ -31,6 +32,7 @@ let
     cmakeBool
     cmakeFeature
     optionals
+    strings
     versionOlder
     ;
 
@@ -39,18 +41,19 @@ let
   stdenv = throw "Use effectiveStdenv instead";
   effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
 
-  # Give a little description difference between the flavors.
+  suffices =
+    lib.optionals useOpenCL [ "OpenCL" ]
+    ++ lib.optionals useCuda [ "CUDA" ]
+    ++ lib.optionals useRocm [ "ROCm" ]
+    ++ lib.optionals useMetalKit [ "MetalKit" ]
+    ++ lib.optionals useBlas [ "BLAS" ];
+
+  pnameSuffix =
+    strings.optionalString (suffices != [ ])
+      "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
   descriptionSuffix =
-    if useOpenCL then
-      " (OpenCL accelerated)"
-    else if useCuda then
-      " (CUDA accelerated)"
-    else if useRocm then
-      " (ROCm accelerated)"
-    else if useMetalKit then
-      " (MetalKit accelerated)"
-    else
-      "";
+    strings.optionalString (suffices != [ ])
+      ", accelerated with ${strings.concatStringsSep ", " suffices}";
 
   # TODO: package the Python in this repository in a Nix-like way.
   # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
@@ -99,7 +102,9 @@ in
 
 effectiveStdenv.mkDerivation (
   finalAttrs: {
-    name = "llama.cpp";
+    pname = "llama-cpp${pnameSuffix}";
+    version = llamaVersion;
+
     src = ../../.;
 
     postPatch = ''
@@ -171,14 +176,14 @@ effectiveStdenv.mkDerivation (
         ;
 
       shell = mkShell {
-        name = "default${descriptionSuffix}";
+        name = "shell-${finalAttrs.finalPackage.name}";
         description = "contains numpy and sentencepiece";
         buildInputs = [ llama-python ];
         inputsFrom = [ finalAttrs.finalPackage ];
       };
 
       shell-extra = mkShell {
-        name = "extra${descriptionSuffix}";
+        name = "shell-extra-${finalAttrs.finalPackage.name}";
         description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
         buildInputs = [ llama-python-extra ];
         inputsFrom = [ finalAttrs.finalPackage ];
diff --git a/.devops/nix/scope.nix b/.devops/nix/scope.nix
index 78e6a126d3b00..7932ac1e8a910 100644
--- a/.devops/nix/scope.nix
+++ b/.devops/nix/scope.nix
@@ -1,3 +1,12 @@
-{ lib, newScope }:
+{
+  lib,
+  newScope,
+  llamaVersion ? "0.0.0",
+}:
 
-lib.makeScope newScope (self: { llama-cpp = self.callPackage ./package.nix { }; })
+lib.makeScope newScope (
+  self: {
+    inherit llamaVersion;
+    llama-cpp = self.callPackage ./package.nix { };
+  }
+)
diff --git a/flake.nix b/flake.nix
index ff610ec64d53b..a7c2b58f5d0da 100644
--- a/flake.nix
+++ b/flake.nix
@@ -16,7 +16,10 @@
   # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; }
   # ```
   outputs =
-    { flake-parts, ... }@inputs:
+    { self, flake-parts, ... }@inputs:
+    let
+      llamaVersion = self.dirtyShortRev or self.shortRev;
+    in
     flake-parts.lib.mkFlake { inherit inputs; }
 
       {
@@ -48,7 +51,9 @@
         #
         # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format
         flake.overlays.default =
-          (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; });
+          (final: prev: {
+            llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
+          });
 
         systems = [
           "aarch64-darwin"
@@ -69,10 +74,10 @@
             # We don't use the overlay here so as to avoid making too many instances of nixpkgs,
             # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs
             packages = {
-              default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp;
+              default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
               opencl = config.packages.default.override { useOpenCL = true; };
-              cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp;
-              rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp;
+              cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
+              rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
             };
           };
       };

From e3b1ba27c21deec131d8082e213bd54372f799ee Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Mon, 25 Dec 2023 17:03:19 +0000
Subject: [PATCH 13/23] flake.nix: overlay: expose both the namespace and the
 default attribute

---
 flake.nix | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flake.nix b/flake.nix
index a7c2b58f5d0da..7c7440fc92970 100644
--- a/flake.nix
+++ b/flake.nix
@@ -53,6 +53,7 @@
         flake.overlays.default =
           (final: prev: {
             llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
+            inherit (final.llamaPackages) llama-cpp;
           });
 
         systems = [

From 12d4a68efedbbd88df3a5fd9889e554059f9b860 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Mon, 25 Dec 2023 17:05:21 +0000
Subject: [PATCH 14/23] ci: add the (Nix) flakestry workflow

---
 .github/workflows/nix-flakestry.yml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 .github/workflows/nix-flakestry.yml

diff --git a/.github/workflows/nix-flakestry.yml b/.github/workflows/nix-flakestry.yml
new file mode 100644
index 0000000000000..3abfb3509a648
--- /dev/null
+++ b/.github/workflows/nix-flakestry.yml
@@ -0,0 +1,23 @@
+# Make the flake discoverable on https://flakestry.dev
+name: "Publish a flake to flakestry"
+on:
+    push:
+        tags:
+        - "v?[0-9]+.[0-9]+.[0-9]+"
+        - "v?[0-9]+.[0-9]+"
+    workflow_dispatch:
+        inputs:
+            tag:
+                description: "The existing tag to publish"
+                type: "string"
+                required: true
+jobs:
+    publish-flake:
+        runs-on: ubuntu-latest
+        permissions:
+            id-token: "write"
+            contents: "read"
+        steps:
+            - uses: flakestry/flakestry-publish@main
+              with:
+                version: "${{ inputs.tag || github.ref_name }}"

From a16f58997c0e70eb97139af55444264aa83f4f78 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 03:58:02 +0000
Subject: [PATCH 15/23] nix: cmakeFlags: explicit OFF bools

---
 .devops/nix/package.nix | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 8fe250651fdd3..8d07508a1e538 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -137,13 +137,13 @@ effectiveStdenv.mkDerivation (
         (cmakeBool "LLAMA_BUILD_SERVER" true)
         (cmakeBool "BUILD_SHARED_LIBS" true)
         (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
-        (cmakeBool "LLAMA_METAL" useMetalKit)
         (cmakeBool "LLAMA_BLAS" useBlas)
+        (cmakeBool "LLAMA_CLBLAST" useOpenCL)
+        (cmakeBool "LLAMA_CUBLAS" useCuda)
+        (cmakeBool "LLAMA_HIPBLAS" useRocm)
+        (cmakeBool "LLAMA_METAL" useMetalKit)
       ]
-      ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ]
-      ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ]
       ++ optionals useRocm [
-        (cmakeBool "LLAMA_HIPBLAS" true)
         (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
         (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
 

From dd0e12c7410ac7e163c6d9b0dbcb9fe8ce3070c9 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 04:02:11 +0000
Subject: [PATCH 16/23] nix: cuda: reduce runtime closure

---
 .devops/nix/package.nix | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 8d07508a1e538..0e10ba61e72ec 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -89,8 +89,15 @@ let
 
   cudaBuildInputs = with cudaPackages; [
     cuda_cccl.dev # <nv/target>
-    cuda_cudart
-    libcublas
+
+    # A temporary hack for reducing the closure size, remove once cudaPackages
+    # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
+    cuda_cudart.dev
+    cuda_cudart.lib
+    cuda_cudart.static
+    libcublas.dev
+    libcublas.lib
+    libcublas.static
   ];
 
   rocmBuildInputs = with rocmPackages; [

From 4522c47a2282a595344ba8dcb85222910b2ffc4f Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 04:05:51 +0000
Subject: [PATCH 17/23] nix: fewer rebuilds

---
 .devops/nix/package.nix | 2 +-
 flake.nix               | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 0e10ba61e72ec..3222ec4695622 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -112,7 +112,7 @@ effectiveStdenv.mkDerivation (
     pname = "llama-cpp${pnameSuffix}";
     version = llamaVersion;
 
-    src = ../../.;
+    src = lib.cleanSource ../../.;
 
     postPatch = ''
       substituteInPlace ./ggml-metal.m \
diff --git a/flake.nix b/flake.nix
index 7c7440fc92970..3575cbf12607a 100644
--- a/flake.nix
+++ b/flake.nix
@@ -18,7 +18,13 @@
   outputs =
     { self, flake-parts, ... }@inputs:
     let
-      llamaVersion = self.dirtyShortRev or self.shortRev;
+      # We could include the git revisions in the package names but those would
+      # needlessly trigger rebuilds:
+      # llamaVersion = self.dirtyShortRev or self.shortRev;
+
+      # Nix already uses cryptographic hashes for versioning, so we'll just fix
+      # the fake semver for now:
+      llamaVersion = "0.0.0";
     in
     flake-parts.lib.mkFlake { inherit inputs; }
 

From ae6bebccb11b17ebeca2809e00e204f45644c76d Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 04:33:24 +0000
Subject: [PATCH 18/23] nix: respect config.cudaCapabilities

---
 .devops/nix/package.nix | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 3222ec4695622..cb1e8f48045c8 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -150,6 +150,14 @@ effectiveStdenv.mkDerivation (
         (cmakeBool "LLAMA_HIPBLAS" useRocm)
         (cmakeBool "LLAMA_METAL" useMetalKit)
       ]
+      ++ optionals useCuda [
+        (
+          with cudaPackages.flags;
+          cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
+            builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
+          )
+        )
+      ]
       ++ optionals useRocm [
         (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
         (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")

From 1efbc6b0643751480a879d94600b589b95e82e21 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 17:26:22 +0000
Subject: [PATCH 19/23] nix: add the impure driver's location to the
 DT_RUNPATHs

---
 .devops/nix/package.nix | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index cb1e8f48045c8..c54a7c3c63e95 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -124,12 +124,20 @@ effectiveStdenv.mkDerivation (
       substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
     '';
 
-    nativeBuildInputs = [
-      cmake
-      ninja
-      pkg-config
-      git
-    ] ++ optionals useCuda [ cudaPackages.cuda_nvcc ];
+    nativeBuildInputs =
+      [
+        cmake
+        ninja
+        pkg-config
+        git
+      ]
+      ++ optionals useCuda [
+        cudaPackages.cuda_nvcc
+
+        # TODO: Replace with autoAddDriverRunpath
+        # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
+        cudaPackages.autoAddOpenGLRunpathHook
+      ];
 
     buildInputs =
       [ mpi ]

From 82e48e256725f4865dd92a307786b4713e551af7 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 22:20:07 +0000
Subject: [PATCH 20/23] nix: clean sources more thoroughly

...this way outPaths change less frequently,
and so there are fewer rebuilds
---
 .devops/nix/package.nix | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index c54a7c3c63e95..2d00994577263 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -112,7 +112,16 @@ effectiveStdenv.mkDerivation (
     pname = "llama-cpp${pnameSuffix}";
     version = llamaVersion;
 
-    src = lib.cleanSource ../../.;
+    src = lib.cleanSourceWith {
+      filter =
+        name: type:
+        !(builtins.any (_: _) [
+          (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
+          (name == "README.md") # Ignore *.md changes whe computing outPaths
+          (lib.hasPrefix "." name) # Skip hidden files and directories
+        ]);
+      src = lib.cleanSource ../../.;
+    };
 
     postPatch = ''
       substituteInPlace ./ggml-metal.m \

From 7bd8d8c6d7de67975fbb9681990d4d4af5b6bbab Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 22:23:30 +0000
Subject: [PATCH 21/23] nix: explicit mpi support

---
 .devops/nix/package.nix | 18 +++++++++++-------
 flake.nix               |  3 +++
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 2d00994577263..5f2a7c9f4bb3d 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -22,6 +22,7 @@
   ],
   useCuda ? config.cudaSupport,
   useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
+  useMpi ? false, # Increases the runtime closure size by ~700M
   useOpenCL ? false,
   useRocm ? config.rocmSupport,
   llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
@@ -42,11 +43,12 @@ let
   effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
 
   suffices =
-    lib.optionals useOpenCL [ "OpenCL" ]
+    lib.optionals useBlas [ "BLAS" ]
     ++ lib.optionals useCuda [ "CUDA" ]
-    ++ lib.optionals useRocm [ "ROCm" ]
     ++ lib.optionals useMetalKit [ "MetalKit" ]
-    ++ lib.optionals useBlas [ "BLAS" ];
+    ++ lib.optionals useMpi [ "MPI" ]
+    ++ lib.optionals useOpenCL [ "OpenCL" ]
+    ++ lib.optionals useRocm [ "ROCm" ];
 
   pnameSuffix =
     strings.optionalString (suffices != [ ])
@@ -149,11 +151,11 @@ effectiveStdenv.mkDerivation (
       ];
 
     buildInputs =
-      [ mpi ]
-      ++ optionals useOpenCL [ clblast ]
+      optionals effectiveStdenv.isDarwin darwinBuildInputs
       ++ optionals useCuda cudaBuildInputs
-      ++ optionals useRocm rocmBuildInputs
-      ++ optionals effectiveStdenv.isDarwin darwinBuildInputs;
+      ++ optionals useMpi [ mpi ]
+      ++ optionals useOpenCL [ clblast ]
+      ++ optionals useRocm rocmBuildInputs;
 
     cmakeFlags =
       [
@@ -166,6 +168,7 @@ effectiveStdenv.mkDerivation (
         (cmakeBool "LLAMA_CUBLAS" useCuda)
         (cmakeBool "LLAMA_HIPBLAS" useRocm)
         (cmakeBool "LLAMA_METAL" useMetalKit)
+        (cmakeBool "LLAMA_MPI" useMpi)
       ]
       ++ optionals useCuda [
         (
@@ -203,6 +206,7 @@ effectiveStdenv.mkDerivation (
         useBlas
         useCuda
         useMetalKit
+        useMpi
         useOpenCL
         useRocm
         ;
diff --git a/flake.nix b/flake.nix
index 3575cbf12607a..d240ececad7bd 100644
--- a/flake.nix
+++ b/flake.nix
@@ -85,6 +85,9 @@
               opencl = config.packages.default.override { useOpenCL = true; };
               cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
               rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
+
+              mpi-cpu = config.packages.default.override { useMpi = true; };
+              mpi-cuda = config.packages.default.override { useMpi = true; };
             };
           };
       };

From d0adab60d5d20bf0db25bc026fe5c7790d47f5ef Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 20:04:49 +0000
Subject: [PATCH 22/23] nix: explicit jetson support

---
 .devops/nix/jetson-support.nix | 32 ++++++++++++++++++++++++++++++++
 flake.nix                      |  1 +
 2 files changed, 33 insertions(+)
 create mode 100644 .devops/nix/jetson-support.nix

diff --git a/.devops/nix/jetson-support.nix b/.devops/nix/jetson-support.nix
new file mode 100644
index 0000000000000..08426d2abb7ec
--- /dev/null
+++ b/.devops/nix/jetson-support.nix
@@ -0,0 +1,32 @@
+{ inputs, ... }:
+{
+  perSystem =
+    {
+      config,
+      system,
+      lib,
+      pkgsCuda,
+      ...
+    }:
+    lib.optionalAttrs (system == "aarch64-linux") {
+      packages =
+        let
+          caps.jetson-xavier = "7.2";
+          caps.jetson-orin = "8.7";
+          caps.jetson-nano = "5.3";
+
+          pkgsFor =
+            cap:
+            import inputs.nixpkgs {
+              inherit system;
+              config = {
+                cudaSupport = true;
+                cudaCapabilities = [ cap ];
+                cudaEnableForwardCompat = false;
+                inherit (pkgsCuda.config) allowUnfreePredicate;
+              };
+            };
+        in
+        builtins.mapAttrs (name: cap: ((pkgsFor cap).callPackage ./scope.nix { }).llama-cpp) caps;
+    };
+}
diff --git a/flake.nix b/flake.nix
index d240ececad7bd..b0a6abd3cd611 100644
--- a/flake.nix
+++ b/flake.nix
@@ -34,6 +34,7 @@
           .devops/nix/nixpkgs-instances.nix
           .devops/nix/apps.nix
           .devops/nix/devshells.nix
+          .devops/nix/jetson-support.nix
         ];
 
         # An overlay can be used to have a more granular control over llama-cpp's

From 3f7003b4bb6e9314ba4f65770dfdcea08a540e7b Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Tue, 26 Dec 2023 22:41:53 +0000
Subject: [PATCH 23/23] flake.nix: darwin: only expose the default

---
 flake.nix | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/flake.nix b/flake.nix
index b0a6abd3cd611..2209070aa83cd 100644
--- a/flake.nix
+++ b/flake.nix
@@ -73,6 +73,7 @@
         perSystem =
           {
             config,
+            lib,
             pkgs,
             pkgsCuda,
             pkgsRocm,
@@ -81,15 +82,18 @@
           {
             # We don't use the overlay here so as to avoid making too many instances of nixpkgs,
             # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs
-            packages = {
-              default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
-              opencl = config.packages.default.override { useOpenCL = true; };
-              cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
-              rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
+            packages =
+              {
+                default = (pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
+              }
+              // lib.optionalAttrs pkgs.stdenv.isLinux {
+                opencl = config.packages.default.override { useOpenCL = true; };
+                cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
+                rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }).llama-cpp;
 
-              mpi-cpu = config.packages.default.override { useMpi = true; };
-              mpi-cuda = config.packages.default.override { useMpi = true; };
-            };
+                mpi-cpu = config.packages.default.override { useMpi = true; };
+                mpi-cuda = config.packages.default.override { useMpi = true; };
+              };
           };
       };
 }