{ lib, pkgs, config, namespace, ... }: with lib; let inherit (lib.${namespace}) mkBoolOpt; cfg = config.${namespace}.hardware.npu; in { # AMD XDNA 2 NPU support module. # # Enables the amdxdna kernel driver and installs the XRT userspace runtime # (libxrt_coreutil + the XDNA shim plugin) built from packages/xrt. # # Prerequisites: # • Linux >= 6.14 (amdxdna in-tree) OR linux-firmware >= 20260221 # for the NPU firmware blobs. CachyOS kernels >= 6.14 satisfy this. # • AMD XDNA 2 NPU silicon (Strix Point, Strix Halo, Kraken Point, # Gorgon Point — Ryzen AI 300-series and later). # # What this module does: # 1. Installs xrt (libxrt_coreutil, libxrt_driver_xdna, xrt-smi) from # the local flake package. # 2. Loads the amdxdna kernel driver. # 3. Raises the per-process memlock limit (required for NPU DMA buffers). # 4. Optionally installs fastflowlm and exposes it system-wide. # # Usage (NixOS config): # ${namespace}.hardware.npu.enable = true; # # Enable FLM system-wide if you also run the lemonade service: # ${namespace}.hardware.npu.fastflowlm.enable = true; options.${namespace}.hardware.npu = { enable = mkEnableOption "AMD XDNA 2 NPU support (XRT + amdxdna driver)"; fastflowlm.enable = mkBoolOpt false '' Install FastFlowLM (flm) system-wide. FastFlowLM runs LLMs directly on the AMD XDNA 2 NPU. Enable this when you also run the lemonade service with an NPU backend, or want standalone `flm` access. ''; }; config = mkIf cfg.enable { assertions = [ { assertion = pkgs.stdenv.hostPlatform.isx86_64; message = "${namespace}.hardware.npu: AMD XDNA NPU support is only available on x86_64-linux."; } ]; # ── Kernel driver ────────────────────────────────────────────────────── # amdxdna is built-in since Linux 6.14. On older kernels (e.g. CachyOS # 6.12/6.13) this explicit load request triggers the DKMS module if it is # installed, or is silently ignored if the driver is already built-in. boot.kernelModules = [ "amdxdna" ]; # ── XRT userspace runtime ────────────────────────────────────────────── environment.systemPackages = [ pkgs.${namespace}.xrt ] ++ lib.optional cfg.fastflowlm.enable pkgs.${namespace}.fastflowlm; # ── Memlock limit ────────────────────────────────────────────────────── # NPU workloads require locking large memory regions for DMA. # Without unlimited memlock the NPU will refuse to allocate buffers. security.pam.loginLimits = [ { domain = "*"; type = "-"; item = "memlock"; value = "unlimited"; } ]; # For system services (e.g. lemonade, fastflowlm) that run under systemd, # the PAM limit above does not apply — they must set LimitMEMLOCK in their # unit. We set a system-wide default via systemd.settings so every service # inherits unlimited memlock unless it explicitly overrides it. systemd.settings.Manager.DefaultLimitMEMLOCK = "infinity"; # ── NPU device permissions ───────────────────────────────────────────── # amdxdna exposes the NPU as /dev/accel/accel0 (DRM accelerator device). # Add a udev rule so members of the "render" group can open it without root. services.udev.extraRules = '' # AMD XDNA 2 NPU — grant access to the render group SUBSYSTEM=="accel", KERNEL=="accel*", GROUP="render", MODE="0660" ''; # Ensure the render group exists. users.groups.render = { }; # ── Firmware ────────────────────────────────────────────────────────── # The NPU firmware blobs ship in linux-firmware >= 20260221. # hardware.enableAllFirmware (set by modules/nixos/hardware/common) already # pulls in the full firmware set; this is an explicit belt-and-braces note. hardware.firmware = [ pkgs.linux-firmware ]; }; }