|
1 | 1 | {
|
| 2 | + description = "Port of Facebook's LLaMA model in C/C++"; |
| 3 | + |
2 | 4 | inputs = {
|
3 | 5 | nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
| 6 | + flake-parts.url = "github:hercules-ci/flake-parts"; |
4 | 7 | };
|
5 | 8 |
|
| 9 | + # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl: |
| 10 | + # |
| 11 | + # ```bash |
| 12 | + # ❯ nix repl |
| 13 | + # nix-repl> :lf github:ggerganov/llama.cpp |
| 14 | + # Added 13 variables. |
| 15 | + # nix-repl> outputs.apps.x86_64-linux.quantize |
| 16 | + # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; } |
| 17 | + # ``` |
6 | 18 | outputs =
|
7 |
| - { self, nixpkgs }: |
8 |
| - |
9 |
| - let |
10 |
| - systems = [ |
11 |
| - "aarch64-darwin" |
12 |
| - "aarch64-linux" |
13 |
| - "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant) |
14 |
| - "x86_64-linux" |
15 |
| - ]; |
16 |
| - eachSystem = f: nixpkgs.lib.genAttrs systems (system: f system); |
17 |
| - in |
| 19 | + { flake-parts, ... }@inputs: |
| 20 | + flake-parts.lib.mkFlake { inherit inputs; } |
18 | 21 |
|
19 |
| - { |
20 |
| - # An overlay can be used to have a more granular control over llama-cpp's |
21 |
| - # dependencies and configuration, than that offered by the `.override` |
22 |
| - # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays. |
23 |
| - # |
24 |
| - # E.g. in a flake: |
25 |
| - # ``` |
26 |
| - # { nixpkgs, llama-cpp, ... }: |
27 |
| - # let pkgs = import nixpkgs { |
28 |
| - # overlays = [ (llama-cpp.overlays.default) ]; |
29 |
| - # system = "aarch64-linux"; |
30 |
| - # config.allowUnfree = true; |
31 |
| - # config.cudaSupport = true; |
32 |
| - # config.cudaCapabilities = [ "7.2" ]; |
33 |
| - # config.cudaEnableForwardCompat = false; |
34 |
| - # }; in { |
35 |
| - # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp; |
36 |
| - # } |
37 |
| - # ``` |
38 |
| - # |
39 |
| - # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format |
40 |
| - overlays.default = (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; }); |
| 22 | + { |
41 | 23 |
|
42 |
| - # These use the package definition from `./.devops/nix/package.nix`. |
43 |
| - # There's one per backend that llama-cpp uses. Add more as needed! |
44 |
| - packages = eachSystem ( |
45 |
| - system: |
46 |
| - let |
47 |
| - # Avoid re-evaluation for the nixpkgs instance, |
48 |
| - # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs |
49 |
| - pkgs = nixpkgs.legacyPackages.${system}; |
| 24 | + imports = [ |
| 25 | + .devops/nix/nixpkgs-instances.nix |
| 26 | + .devops/nix/apps.nix |
| 27 | + .devops/nix/devshells.nix |
| 28 | + ]; |
50 | 29 |
|
51 |
| - # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, |
52 |
| - # and ucx are built with CUDA support) |
53 |
| - pkgsCuda = import nixpkgs { |
54 |
| - inherit system; |
| 30 | + # An overlay can be used to have a more granular control over llama-cpp's |
| 31 | + # dependencies and configuration, than that offered by the `.override` |
| 32 | + # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays. |
| 33 | + # |
| 34 | + # E.g. in a flake: |
| 35 | + # ``` |
| 36 | + # { nixpkgs, llama-cpp, ... }: |
| 37 | + # let pkgs = import nixpkgs { |
| 38 | + # overlays = [ (llama-cpp.overlays.default) ]; |
| 39 | + # system = "aarch64-linux"; |
| 40 | + # config.allowUnfree = true; |
| 41 | + # config.cudaSupport = true; |
| 42 | + # config.cudaCapabilities = [ "7.2" ]; |
| 43 | + # config.cudaEnableForwardCompat = false; |
| 44 | + # }; in { |
| 45 | + # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp; |
| 46 | + # } |
| 47 | + # ``` |
| 48 | + # |
| 49 | + # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format |
| 50 | + flake.overlays.default = |
| 51 | + (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; }); |
55 | 52 |
|
56 |
| - config.cudaSupport = true; |
57 |
| - config.allowUnfreePredicate = |
58 |
| - p: |
59 |
| - builtins.all |
60 |
| - ( |
61 |
| - license: |
62 |
| - license.free |
63 |
| - || builtins.elem license.shortName [ |
64 |
| - "CUDA EULA" |
65 |
| - "cuDNN EULA" |
66 |
| - ] |
67 |
| - ) |
68 |
| - (p.meta.licenses or [ p.meta.license ]); |
69 |
| - }; |
| 53 | + systems = [ |
| 54 | + "aarch64-darwin" |
| 55 | + "aarch64-linux" |
| 56 | + "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant) |
| 57 | + "x86_64-linux" |
| 58 | + ]; |
70 | 59 |
|
71 |
| - # Ensure dependencies use ROCm consistently |
72 |
| - pkgsRocm = import nixpkgs { |
73 |
| - inherit system; |
74 |
| - config.rocmSupport = true; |
| 60 | + perSystem = |
| 61 | + { |
| 62 | + config, |
| 63 | + pkgs, |
| 64 | + pkgsCuda, |
| 65 | + pkgsRocm, |
| 66 | + ... |
| 67 | + }: |
| 68 | + { |
| 69 | + # We don't use the overlay here so as to avoid making too many instances of nixpkgs, |
| 70 | + # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs |
| 71 | + packages = { |
| 72 | + default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp; |
| 73 | + opencl = config.packages.default.override { useOpenCL = true; }; |
| 74 | + cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp; |
| 75 | + rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp; |
| 76 | + }; |
75 | 77 | };
|
76 |
| - in |
77 |
| - { |
78 |
| - default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp; |
79 |
| - opencl = self.packages.${system}.default.override { useOpenCL = true; }; |
80 |
| - cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp; |
81 |
| - rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp; |
82 |
| - } |
83 |
| - ); |
84 |
| - |
85 |
| - # These use the definition of llama-cpp from `./.devops/nix/package.nix` |
86 |
| - # and expose various binaries as apps with `nix run .#app-name`. |
87 |
| - # Note that none of these apps use anything other than the default backend. |
88 |
| - apps = eachSystem ( |
89 |
| - system: |
90 |
| - import ./.devops/nix/apps.nix { |
91 |
| - package = self.packages.${system}.default; |
92 |
| - binaries = [ |
93 |
| - "llama" |
94 |
| - "llama-embedding" |
95 |
| - "llama-server" |
96 |
| - "quantize" |
97 |
| - "train-text-from-scratch" |
98 |
| - ]; |
99 |
| - } |
100 |
| - ); |
101 |
| - |
102 |
| - # These expose a build environment for either a "default" or an "extra" set of dependencies. |
103 |
| - devShells = eachSystem ( |
104 |
| - system: |
105 |
| - import ./.devops/nix/devshells.nix { |
106 |
| - concatMapAttrs = nixpkgs.lib.concatMapAttrs; |
107 |
| - packages = self.packages.${system}; |
108 |
| - } |
109 |
| - ); |
110 |
| - }; |
| 78 | + }; |
111 | 79 | }
|
0 commit comments