diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..653de751 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,98 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "nixos-anywhere-pxe" +description = "Install NixOS with PXE" +dynamic = ["version"] +scripts = { nixos-anywhere-pxe = "nixos_anywhere_pxe:main"} + +[tool.pytest.ini_options] +addopts = "--cov . --cov-report term --cov-fail-under=100 --no-cov-on-fail" + +[tool.mypy] +python_version = "3.12" +warn_redundant_casts = true +disallow_untyped_calls = true +disallow_untyped_defs = true +no_implicit_optional = true + +[[tool.mypy.overrides]] +module = "setuptools.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "pytest.*" +ignore_missing_imports = true + +[tool.ruff] +line-length = 88 + +lint.select = ["ALL"] +lint.ignore = [ + # pydocstyle + "D", + # todo comments + "TD", + # fixmes + "FIX", + # line length + "E501", + "T201", # `print` found + "PLR2004", # Magic value used in comparison + # Too many statements + "PLR0915", + # Too many arguments in function definition + "PLR0913", + "PLR0912", # Too many branches + # $X is too complex + "C901", + # Unused function argument + "ARG001", + + # Dynamically typed expressions (typing.Any) + "ANN401", + # Trailing comma missing + "COM812", + # Unnecessary `dict` call (rewrite as a literal) + "C408", + # Found commented-out code + "ERA001", + # Boolean-typed positional argument in function definition + "FBT001", + # Logging statement uses f-string + "G004", + # disabled on ruff's recommendation as causes problems with the formatter + "ISC001", + # Use of `assert` detected + "S101", + # `subprocess` call: check for execution of untrusted input + "S603", + # Starting a process with a partial executable path + "S607", + # Boolean default positional argument in function definition + "FBT002", +] + +[tool.black] +line-length = 88 +target-version = ['py310'] +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + # The following are specific to Black, you probably don't want those. + | blib2to3 + | tests/data + | profiling +)/ +''' diff --git a/src/default.nix b/src/default.nix index 515e3071..1c86f3ba 100644 --- a/src/default.nix +++ b/src/default.nix @@ -15,6 +15,12 @@ , lib , makeWrapper , mkShellNoCC +, mypy +, pixiecore +, dnsmasq +, python3 +, qemu_kvm +, OVMF }: let runtimeDeps = [ @@ -47,7 +53,15 @@ stdenv.mkDerivation { # Dependencies for our devshell passthru.devShell = mkShellNoCC { - packages = runtimeDeps ++ [ openssh terraform-docs ]; + OVMF = "${OVMF.fd}/FV/OVMF.fd"; + packages = runtimeDeps ++ [ + openssh + terraform-docs + mypy + pixiecore + dnsmasq + qemu_kvm + ]; }; meta = with lib; { diff --git a/src/flake-module.nix b/src/flake-module.nix index 7f4b40fe..da6d9b73 100644 --- a/src/flake-module.nix +++ b/src/flake-module.nix @@ -1,7 +1,8 @@ { perSystem = { config, pkgs, ... }: { - packages = { + packages = rec { nixos-anywhere = pkgs.callPackage ./. { }; + nixos-anywhere-pxe = pkgs.callPackage ./nixos_anywhere_pxe { inherit nixos-anywhere; }; default = config.packages.nixos-anywhere; }; devShells.default = config.packages.nixos-anywhere.devShell; diff --git a/src/nixos_anywhere_pxe/__init__.py b/src/nixos_anywhere_pxe/__init__.py new file mode 100644 index 00000000..ed96c821 --- /dev/null +++ b/src/nixos_anywhere_pxe/__init__.py @@ -0,0 +1,569 @@ +from __future__ import annotations + +import argparse +import binascii +import gzip +import ipaddress +import json +import os +import shlex +import shutil +import subprocess +import sys +import time +from contextlib import ExitStack, contextmanager +from dataclasses import dataclass +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import IO, TYPE_CHECKING, NoReturn + +if TYPE_CHECKING: + from collections.abc import Iterator + +FILE = None | int | IO + +# If we are running from a local checkout, use the local nixos-anywhere.sh otherwise use the installed one. +NIXOS_ANYWHERE_SH = "nixos-anywhere" +LOCAL_NIXOS_ANYWHERE_SH = Path(__file__).parent.absolute() / "src/nixos-anywhere.sh" +if not LOCAL_NIXOS_ANYWHERE_SH.exists(): + NIXOS_ANYWHERE_SH = str(LOCAL_NIXOS_ANYWHERE_SH) + + +def run( + cmd: str | list[str], + input: str | None = None, # noqa: A002 + stdout: FILE = None, + stderr: FILE = None, + extra_env: dict[str, str] | None = None, + cwd: None | str | Path = None, + check: bool = True, +) -> subprocess.CompletedProcess[str]: + if extra_env is None: + extra_env = {} + shell = False + if isinstance(cmd, str): + cmd = [cmd] + shell = True + displayed_cmd = shlex.join(cmd) + print(f"$ {displayed_cmd}") + env = os.environ.copy() + env.update(extra_env) + return subprocess.run( + cmd, + shell=shell, + input=input, + stdout=stdout, + stderr=stderr, + env=env, + cwd=cwd, + check=check, + text=True, + ) + + +@dataclass +class DhcpEvent: + action: str + mac_address: str + ip_addr: str + hostname: str | None = None + + +class Dnsmasq: + def __init__(self, process: subprocess.Popen, dhcp_fifo_path: Path) -> None: + self.process = process + self.dhcp_fifo_path = dhcp_fifo_path + + def next_dhcp_event(self) -> Iterator[DhcpEvent]: + while True: + with self.dhcp_fifo_path.open() as f: + for fifo_line in f: + raw_event = json.loads(fifo_line) + yield DhcpEvent(**raw_event) + time.sleep(0.1) + + +# Whenever a new DHCP lease is created, or an old one destroyed, or a TFTP file +# transfer completes, the executable specified by this option is run. +# must be an absolute pathname, no PATH search occurs. +# The arguments to the process are: +# - "add|old|del" +# - "add": means a lease has been created +# - "del": means it has been destroyed +# - "old": is a notification of an existing lease when dnsmasq starts or a change to +# MAC address or hostname of an existing lease (also, lease length or expiry +# and client-id, if leasefile-ro is set). +# - the MAC address of the host (or DUID for IPv6) +# - If the MAC address is from a network type other than ethernet, it will have +# the network type prepended, eg "06-01:23:45:67:89:ab" for token ring. +# - the IP address +# - and the hostname, if known. +# The process is run as root (assuming that dnsmasq was originally run as root) +# even if dnsmasq is configured to change UID to an unprivileged user. +@contextmanager +def start_dnsmasq( + interface: str, + dhcp_range: tuple[ipaddress.IPv4Address, ipaddress.IPv4Address], +) -> Iterator[Dnsmasq]: + with TemporaryDirectory(prefix="dnsmasq.") as _temp: + temp = Path(_temp) + dhcp_script = temp / "dhcp-script" + fifo = temp / "fifo" + dhcp_script.write_text( + f"""#!{sys.executable} +import os +import sys +import json +print(sys.argv) +if len(sys.argv) <= 4: + sys.exit(0) +with open("{fifo}", "w") as f: + data = dict(action=sys.argv[1], mac_address=sys.argv[2], ip_addr=sys.argv[3]) + if len(sys.argv) >= 5: + data["hostname"] = sys.argv[4] + print(json.dumps(data), file=f) +""", + ) + dhcp_script.chmod(0o700) + conf = temp / "dnsmasq.conf" + conf.write_text( + f""" +leasefile-ro +keep-in-foreground +log-facility=- +dhcp-option=3 +dhcp-option=6 +dhcp-range={dhcp_range[0]},{dhcp_range[1]},12h +interface={interface} +port=0 +dhcp-script={dhcp_script} + """, + ) + import time + + time.sleep(1) + os.mkfifo(fifo, 0o600) + env = os.environ.copy() + env["FIFO_PATH"] = str(fifo) + args = ["dnsmasq", "-C", str(conf)] + print(f"spawn {' '.join(args)}") + with subprocess.Popen(args, text=True, env=env) as process: + try: + yield Dnsmasq(process, fifo) + finally: + print("terminate dnsmasq") + process.terminate() + try: + process.wait(timeout=4) + except subprocess.TimeoutExpired: + process.kill() + else: + return + + +@contextmanager +def start_pixiecore( + server_ip: ipaddress.IPv4Address, + port: int, + ssh_public_key: Path, + pxe_image_store_path: Path, + hostname: str, +) -> Iterator[subprocess.Popen]: + with TemporaryDirectory(prefix="pixiecore.") as _temp: + temp = Path(_temp) + extra_initrd_root = temp / "extra-initrd" + authorized_keys = extra_initrd_root / "ssh" / "authorized_keys" + authorized_keys.parent.mkdir(mode=0o700, parents=True, exist_ok=True) + authorized_keys.write_text(ssh_public_key.read_text()) + uncompressed_initrd_file = temp / "extra-initrd.cpio" + with uncompressed_initrd_file.open("w+") as f: + run( + ["cpio", "-o", "-Hnewc"], + cwd=extra_initrd_root, + stdout=f, + input="./\n./ssh\n./ssh/authorized_keys", + ) + compressed_initrd_file = temp / "extra-initrd.cpio.gz" + + # compression is needed here since at least the UEFI implementation used + # in qemu does not like uncompressed. + with ( + uncompressed_initrd_file.open(mode="rb") as f_in, + gzip.open(compressed_initrd_file, "wb") as f_out, + ): + shutil.copyfileobj(f_in, f_out) + + init = (pxe_image_store_path / "init").resolve() + cmdline = (pxe_image_store_path / "kernel-params").read_text() + cmdline += f" boot.shell_on_fail init={init} hostname={hostname}" + kernel = pxe_image_store_path / "bzImage" + initrds = [pxe_image_store_path / "initrd", compressed_initrd_file] + args = list( + map( + str, + [ + "pixiecore", + "boot", + kernel, + *initrds, + "--cmdline", + cmdline, + "--debug", + "--listen-addr", + server_ip, + "--dhcp-no-bind", + "--port", + port, + "--status-port", + port, + ], + ), + ) + print(f"spawn {' '.join(args)}") + with subprocess.Popen(args, text=True) as process: + try: + yield process + finally: + process.terminate() + try: + process.wait(timeout=4) + except subprocess.TimeoutExpired: + process.kill() + else: + return + + +@dataclass +class Options: + flake: str + netboot_image_flake: str + skip_firewall: bool + dhcp_interface: str + dhcp_server_ip: ipaddress.IPv4Address + dhcp_subnet: int + dhcp_range: tuple[ipaddress.IPv4Address, ipaddress.IPv4Address] + pixiecore_http_port: int + pause_after_completion: bool + nixos_anywhere_args: list[str] + private_key: Path | None = None + + +@contextmanager +def open_firewall(options: Options) -> Iterator[None]: + if options.skip_firewall: + yield + return + + ports = [ + [p] + for p in [f"tcp/{options.pixiecore_http_port}", "67/udp", "69/udp", "4011/udp"] + ] + if shutil.which("nixos-firewall-tool") is not None: + command_prefix = ["nixos-firewall-tool", "open"] + ports = [ + ["tcp", str(options.pixiecore_http_port)], + ["udp", "67"], + ["udp", "69"], + ["udp", "4011"], + ] + reset_command = ["nixos-firewall-tool", "reset"] + elif shutil.which("ufw") is not None: + command_prefix = ["ufw", "allow"] + reset_command = ["ufw", "reload"] + elif shutil.which("firewall-cmd") is not None: + command_prefix = ["firewall-cmd", "--add-port"] + reset_command = ["firewall-cmd", "--reload"] + else: + print( + f"No firewall tool found. Please make sure that the following ports are open: 67/udp, 69/udp, 4011/udp, and {options.pixiecore_http_port}/tcp", + file=sys.stderr, + ) + yield + return + + try: + for port in ports: + subprocess.run([*command_prefix, *port], check=True) + yield + finally: + if subprocess.run(reset_command, check=True).returncode != 0: + print( + "failed to reset firewall rules, see above for details", file=sys.stderr + ) + + +def die(msg: str) -> NoReturn: + print(msg, file=sys.stderr) + sys.exit(1) + + +def parse_args(args: list[str]) -> Options: + parser = argparse.ArgumentParser( + description="Note: All arguments not listed here will be passed on to nixos-anywhere (see `nixos-anywhere --help`).", + ) + parser.add_argument( + "--flake", + help="Flake url of nixos configuration to install", + required=True, + ) + parser.add_argument( + "--netboot-image-flake", + help="Flake url of netboot image to use for PXE boot", + default="github:nix-community/nixos-images#netboot-installer-nixos-unstable", + ) + parser.add_argument( + "--dhcp-subnet", + help="ipv4 dhcp subnet to use for dhcp", + default="192.168.35.0/24", + ) + parser.add_argument( + "--dhcp-interface", + help="DHCP network interface name to bind to i.e. eth0", + required=True, + ) + parser.add_argument( + "--pixiecore-http-port", + help="Port to listen on for HTTP in pixiecore", + default=64172, + type=int, + ) + parser.add_argument( + "--pause-after-completion", + help="Whether to wait for user confirmation before tearing down the network setup once the installation completed", + action="store_true", + ) + parser.add_argument( + "--skip-firewall", + help="Skip opening firewall ports", + action="store_true", + ) + parser.add_argument( + "-i", + "--private-key", + help="Path to private key to use for ssh connection to target machine", + type=Path, + ) + + parsed, unknown_args = parser.parse_known_args(args) + try: + dhcp_subnet = ipaddress.ip_network(parsed.dhcp_subnet) + except ValueError as e: + die(f"subnet specified in --dhcp-subnet is not valid: {e}") + + if dhcp_subnet.version != 4: + die( + "Sorry. Only ipv4 subnets are supported just now because of compatibility with older bios firmware", + ) + + hosts = dhcp_subnet.hosts() + try: + dhcp_server_ip = next(hosts) + except StopIteration: + die(f"not enough ip addresses found in dhcp-subnet: {parsed.dhcp_subnet}") + + try: + start_ip = next(hosts) + stop_ip = start_ip + except StopIteration: + die(f"not enough ip addresses found in dhcp-subnet: {parsed.dhcp_subnet}") + try: + for _ in range(50): + stop_ip = next(hosts) + except StopIteration: + pass + + return Options( + flake=parsed.flake, + skip_firewall=parsed.skip_firewall, + netboot_image_flake=parsed.netboot_image_flake, + dhcp_server_ip=dhcp_server_ip, + dhcp_subnet=dhcp_subnet.prefixlen, + dhcp_range=(start_ip, stop_ip), + dhcp_interface=parsed.dhcp_interface, + pixiecore_http_port=parsed.pixiecore_http_port, + pause_after_completion=parsed.pause_after_completion, + nixos_anywhere_args=unknown_args, + private_key=parsed.private_key, + ) + + +@dataclass +class SshKey: + private_key: Path + public_key: Path + + +@contextmanager +def ssh_private_key() -> Iterator[SshKey]: + with TemporaryDirectory(suffix="ssh-keys") as _dir: + temp = Path(_dir) + private_key = temp / "id_ed25519" + public_key = temp / "id_ed25519.pub" + run(["ssh-keygen", "-t", "ed25519", "-f", str(private_key), "-N", ""]) + yield SshKey(private_key=private_key, public_key=public_key) + + +def nixos_anywhere( + ip: str, + flake: str, + ssh_private_key: Path, + nixos_anywhere_args: list[str], +) -> None: + cmd = [ + # FIXME: path + "bash", + NIXOS_ANYWHERE_SH, + "--flake", + flake, + "-L", + # do not substitute because we do not have internet and copying locally is faster. + "--no-substitute-on-destination", + "-i", + str(ssh_private_key), + ip, + *nixos_anywhere_args, + ] + rc = run( + cmd, + check=False, + ) + if rc.returncode != 0: + print( + "The installation failed, you may run the install command manually again:" + ) + print(f"{cmd} -i {ssh_private_key}") + + +@contextmanager +def configure_network_interface(interface: str, ip_addr: str) -> Iterator[None]: + # TODO macos support... + has_nmcli = shutil.which("nmcli") is not None + try: + if has_nmcli: + # Don't fail execution if networkmanager is not running + subprocess.run( + ["nmcli", "device", "set", interface, "managed", "no"], + check=False, + ) + # FIXME find a way to avoid this. having multiple ip addresses messes up pixieboot just now. + run(["ip", "addr", "flush", "dev", interface]) + run(["ip", "addr", "change", str(ip_addr), "dev", interface]) + run(["ip", "link", "set", "dev", interface, "up"]) + try: + yield + finally: + run(["ip", "addr", "del", str(ip_addr), "dev", interface], check=False) + finally: + # FIXME: detect if the device was unmanaged before + if has_nmcli: + run(["nmcli", "device", "set", interface, "managed", "yes"], check=False) + + +# FIXME make this just download things? +def build_pxe_image(netboot_image_flake: str) -> Path: + res = run( + ["nix", "build", "--print-out-paths", netboot_image_flake], + stdout=subprocess.PIPE, + ) + return Path(res.stdout.strip()) + + +def pause() -> None: + print() + # no clue how to flush stdin with python. Gonna wait for a specific string instead (as opposed to wait for [enter]). + answer = "" + while answer != "continue": + answer = input( + "Answer 'continue' to terminate this script and tear down the network to the server: ", + ) + + +def dispatch_dnsmasq( + dnsmasq: Dnsmasq, + options: Options, + ssh_key: SshKey, + random_hostname: str, +) -> None: + seen_devices = set() + for event in dnsmasq.next_dhcp_event(): + print(f"{event.action} client (mac: {event.mac_address}, ip: {event.ip_addr}") + if event.action not in ("add", "old"): + continue + if event.hostname != random_hostname: + print( + f"ignore client {event.hostname or event.mac_address} != {random_hostname}", + ) + continue + if event.mac_address in seen_devices: + print(f"skip already seen device with mac address {event.mac_address}") + seen_devices.add(event.mac_address) + + nixos_anywhere( + event.ip_addr, + options.flake, + ssh_key.private_key, + options.nixos_anywhere_args, + ) + # to avoid having to reboot physical machines all the time because networking disappears: + if options.pause_after_completion: + print("You can connect to the machine by doing:") + print(f" ssh -i {ssh_key.private_key} root@{event.ip_addr}") + pause() + return + + +def run_nixos_anywhere(options: Options) -> None: + pxe_image_store_path = build_pxe_image(options.netboot_image_flake) + + random_hostname = f"nixos-pxe-{binascii.b2a_hex(os.urandom(4)).decode('ascii')}" + + subprocess.run(["nixos-rebuild", "build", "--flake", options.flake], check=True) + + with ExitStack() as stack: + stack.enter_context( + configure_network_interface( + options.dhcp_interface, + f"{options.dhcp_server_ip}/{options.dhcp_subnet}", + ) + ) + if options.private_key is None: + ssh_key = stack.enter_context(ssh_private_key()) + else: + ssh_key = SshKey( + private_key=options.private_key, + public_key=options.private_key.with_suffix(".pub"), + ) + stack.enter_context(open_firewall(options)) + stack.enter_context( + start_pixiecore( + options.dhcp_server_ip, + options.pixiecore_http_port, + ssh_key.public_key, + pxe_image_store_path, + random_hostname, + ) + ) + try: + dnsmasq = stack.enter_context( + start_dnsmasq(options.dhcp_interface, options.dhcp_range) + ) + print("Waiting for a client to install nixos to. Cancel with Ctrl-C!") + try: + dispatch_dnsmasq(dnsmasq, options, ssh_key, random_hostname) + except KeyboardInterrupt: + print("terminating...") + except Exception as e: + print(f"error: {e}") + raise + + +# switch to https://pojntfx.github.io/go-isc-dhcp/ ? +def main(args: list[str] = sys.argv[1:]) -> None: + options = parse_args(args) + if os.geteuid() != 0: + die("You need to have root privileges to run this script. Exiting.") + run_nixos_anywhere(options) + + +if __name__ == "__main__": + main() diff --git a/src/nixos_anywhere_pxe/default.nix b/src/nixos_anywhere_pxe/default.nix new file mode 100644 index 00000000..80db8870 --- /dev/null +++ b/src/nixos_anywhere_pxe/default.nix @@ -0,0 +1,49 @@ +{ pkgs +, lib +, python3 +, ruff +, runCommand +, nixos-anywhere +}: +let + src = ../..; + + devDependencies = lib.attrValues { + inherit (pkgs) ruff; + inherit (python3.pkgs) + black + mypy + pytest + pytest-cov + pytest-subprocess + setuptools + wheel + ; + }; + + package = python3.pkgs.buildPythonApplication { + name = "nixos-anywhere-pxe"; + inherit src; + format = "pyproject"; + nativeBuildInputs = [ + python3.pkgs.setuptools + ]; + propagatedBuildInputs = [ + nixos-anywhere + ]; + passthru.tests = { inherit nixos-anywhere-pxe-mypy; }; + passthru.devDependencies = devDependencies; + }; + + checkPython = python3.withPackages (_ps: devDependencies); + + nixos-anywhere-pxe-mypy = runCommand "nixos-anywhere-pxe-mypy" { } '' + cp -r ${src} ./src + chmod +w -R ./src + cd src + ${checkPython}/bin/mypy . + touch $out + ''; + +in +package diff --git a/test-ipxe-vm.sh b/test-ipxe-vm.sh new file mode 100755 index 00000000..d3f28c36 --- /dev/null +++ b/test-ipxe-vm.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +set -x -eu -o pipefail + +VM_IF="nixos-if0" +BRIDGE_IF="nixos-br0" +VM_IMAGE="nixos-nvme.img" + +extra_flags=() +if [[ -n ${OVMF-} ]]; then + extra_flags+=("-bios" "$OVMF") +fi + +for arg in "${@}"; do + case "$arg" in + prepare) + sudo ip tuntap add "$VM_IF" mode tap user "$(id -un)" + sudo ip link set dev "$VM_IF" up + sudo ip link add "$BRIDGE_IF" type bridge + sudo ip link set "$VM_IF" master "$BRIDGE_IF" + truncate -s10G "$VM_IMAGE" + ;; + start) + qemu-system-x86_64 -m 4G \ + -boot n \ + -smp "$(nproc)" \ + -enable-kvm \ + -cpu max \ + -netdev tap,id=mynet0,ifname="$VM_IF",script=no,downscript=no \ + -device e1000,netdev=mynet0,mac=52:55:00:d1:55:01 \ + -drive file="$VM_IMAGE",if=none,id=nvm,format=raw \ + -device nvme,serial=deadbeef,drive=nvm \ + -nographic \ + "${extra_flags[@]}" + ;; + destroy) + sudo ip link del "$VM_IF" || true + sudo ip link del "$BRIDGE_IF" || true + rm -f "$VM_IMAGE" + ;; + *) + echo "USAGE: $0 (prepare|start|destroy)" + ;; + esac +done diff --git a/treefmt/flake-module.nix b/treefmt/flake-module.nix index 3444f248..15af1c29 100644 --- a/treefmt/flake-module.nix +++ b/treefmt/flake-module.nix @@ -2,7 +2,7 @@ imports = [ inputs.treefmt-nix.flakeModule ]; - perSystem = { config, pkgs, ... }: { + perSystem = { config, lib, pkgs, ... }: { treefmt = { projectRootFile = "flake.nix"; programs.mdsh.enable = true; @@ -10,6 +10,8 @@ programs.shellcheck.enable = true; programs.shfmt.enable = true; programs.deno.enable = !pkgs.deno.meta.broken; + programs.ruff.format = true; + programs.ruff.check = true; settings.formatter.shellcheck.options = [ "-s" "bash" ]; }; formatter = config.treefmt.build.wrapper;