From ab81e78b60ed0eb897377f1583973f5bc53926ba Mon Sep 17 00:00:00 2001 From: mjallen18 Date: Wed, 25 Mar 2026 20:46:42 -0500 Subject: [PATCH] init xrt and fflm --- modules/nixos/services/grafana/default.nix | 57 ++--- packages/fastflowlm/default.nix | 248 +++++++++++++++++++++ packages/xrt/default.nix | 114 ++++++++++ systems/x86_64-linux/jallen-nas/sops.nix | 13 ++ 4 files changed, 404 insertions(+), 28 deletions(-) create mode 100644 packages/fastflowlm/default.nix create mode 100644 packages/xrt/default.nix diff --git a/modules/nixos/services/grafana/default.nix b/modules/nixos/services/grafana/default.nix index fce4e57..1ff2a57 100755 --- a/modules/nixos/services/grafana/default.nix +++ b/modules/nixos/services/grafana/default.nix @@ -400,34 +400,30 @@ let # ntfy via the Grafana webhook contact point. Grafana POSTs a JSON # body; ntfy accepts any body as the message text. We use the # message template below to format it nicely. - # Basic auth credentials are read from the SOPS secret at runtime - # via Grafana's $__file{} provider. - contactPoints.settings = { - apiVersion = 1; - contactPoints = [ - { - name = "ntfy"; - receivers = [ - { - uid = "ntfy-webhook"; - type = "webhook"; - settings = { - url = "https://ntfy.mjallen.dev/grafana-alerts"; - httpMethod = "POST"; - username = "$__file{${config.sops.secrets."jallen-nas/ntfy/user".path}}"; - password = "$__file{${config.sops.secrets."jallen-nas/ntfy/password".path}}"; - # Pass alert title and state as ntfy headers via the - # custom message template (defined below). - httpHeaders = { - "Tags" = "chart,bell"; - }; - }; - disableResolveMessage = false; - } - ]; - } - ]; - }; + # + # Credentials are injected via Grafana's $__env{} provider, which + # reads from the process environment. The GRAFANA_NTFY_USER and + # GRAFANA_NTFY_PASSWORD variables are set via the SOPS-managed + # grafana.env EnvironmentFile on the grafana.service unit. + # + # Note: $__file{} only works in grafana.ini settings, not in + # provisioning YAML files — using it here causes a parse error. + contactPoints.path = pkgs.writeTextDir "contactPoints.yaml" '' + apiVersion: 1 + contactPoints: + - name: ntfy + receivers: + - uid: ntfy-webhook + type: webhook + disableResolveMessage: false + settings: + url: https://ntfy.mjallen.dev/grafana-alerts + httpMethod: POST + username: $__env{GRAFANA_NTFY_USER} + password: $__env{GRAFANA_NTFY_PASSWORD} + httpHeaders: + Tags: "chart,bell" + ''; # ── Notification message template ─────────────────────────────────── # Grafana sends the rendered template body as the POST body. @@ -878,6 +874,11 @@ let }; }; + # Inject ntfy credentials into Grafana's environment so the $__env{} + # provider in contactPoints.yaml can resolve them at runtime. + # The grafana.env template is managed by SOPS and owned by grafana:grafana. + systemd.services.grafana.serviceConfig.EnvironmentFile = config.sops.templates."grafana.env".path; + # The redis exporter needs AF_INET to reach TCP Redis instances. # The default systemd hardening only allows AF_UNIX. systemd.services.prometheus-redis-exporter.serviceConfig.RestrictAddressFamilies = [ diff --git a/packages/fastflowlm/default.nix b/packages/fastflowlm/default.nix new file mode 100644 index 0000000..f76d415 --- /dev/null +++ b/packages/fastflowlm/default.nix @@ -0,0 +1,248 @@ +{ + lib, + stdenv, + fetchFromGitHub, + cmake, + ninja, + pkg-config, + rustPlatform, + cargo, + rustc, + # C++ build-time dependencies + boost, + curl, + openssl, + fftw, + fftwFloat, # fftw3f (single-precision) + fftwLongDouble, # fftw3l (long-double-precision) + ffmpeg, + readline, + libdrm, + libuuid, + # ELF patching for the bundled proprietary .so files + autoPatchelfHook, + patchelf, + gcc-unwrapped, + # Access to other flake packages (packages/xrt) + pkgs, + namespace, +}: + +# FastFlowLM (FLM) — Ollama-style LLM runtime for AMD Ryzen AI (XDNA 2) NPUs. +# +# Build overview +# ============== +# The repository contains: +# src/ C++20 CMake project → produces the `flm` binary +# third_party/ +# tokenizers-cpp/ git submodule — builds tokenizers_cpp (C++) + +# libtokenizers_c.a (Rust staticlib via cargo) +# src/lib/*.so Proprietary NPU kernel libraries (pre-built, bundled) +# src/xclbins/ AIE bitstreams (pre-built, loaded at runtime by .so) +# src/model_list.json Model registry +# +# Runtime prerequisites (managed outside this package): +# • Linux >= 6.14 with amdxdna in-tree driver, or amdxdna-dkms on older +# kernels +# • linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0) +# • Memlock = unlimited for the FLM process +# • packages/xrt (libxrt_coreutil) built and available +# +# To update to a new release +# ========================== +# 1. Bump `version` below. +# 2. Update `srcHash` (run: nix-prefetch-git --url ...FastFlowLM --rev v). +# 3. If the tokenizers-cpp submodule rev changed (check .gitmodules / git +# submodule status), update `tokenizersRev` and `tokenizersHash`: +# nix-prefetch-git --url .../tokenizers-cpp --rev --fetch-submodules +# 4. Update `cargoVendorHash`: set to lib.fakeHash, run nix build, copy hash. + +let + version = "0.9.36"; + + # XRT userspace runtime — built from packages/xrt in this flake. + xrt = pkgs.${namespace}.xrt; + + # ── tokenizers-cpp submodule ────────────────────────────────────────────── + # Pinned to the commit referenced in FastFlowLM v0.9.36 .gitmodules. + tokenizersRev = "34885cfd7b9ef27b859c28a41e71413dd31926f5"; + + tokenizers-cpp-src = fetchFromGitHub { + owner = "mlc-ai"; + repo = "tokenizers-cpp"; + rev = tokenizersRev; + # Includes sentencepiece + msgpack sub-submodules. + hash = "sha256-m3A9OhCXJgvvV9UbVL/ijaUC1zkLHlddnQLqZEA5t4w="; + fetchSubmodules = true; + }; + + # Vendor the Rust crates from tokenizers-cpp/rust/Cargo.toml offline. + # This fixed-output derivation has network access; everything else is sandboxed. + # To compute the hash: set to lib.fakeHash → nix build → copy printed hash. + cargoVendorDir = rustPlatform.fetchCargoVendor { + src = tokenizers-cpp-src; + sourceRoot = "source/rust"; + hash = lib.fakeHash; # FIXME: replace after first successful build attempt + }; + +in +stdenv.mkDerivation rec { + pname = "fastflowlm"; + inherit version; + + src = fetchFromGitHub { + owner = "FastFlowLM"; + repo = "FastFlowLM"; + rev = "v${version}"; + # We do NOT fetch submodules here — tokenizers-cpp is injected separately + # (above) so that its Rust deps can be vendored in a fixed-output derivation. + hash = "sha256-uq/ZxvJA5HTJbMxofO4Hrz7ULvV1fPC7OHRXulMqwqw="; + }; + + nativeBuildInputs = [ + cmake + ninja + pkg-config + cargo + rustc + autoPatchelfHook + patchelf + ]; + + buildInputs = [ + boost + curl + openssl + fftw + fftwFloat + fftwLongDouble + ffmpeg + readline + libdrm + libuuid + xrt + # libstdc++ / libgcc_s needed at runtime by the bundled NPU .so files. + gcc-unwrapped.lib + ]; + + # autoPatchelfHook uses runtimeDependencies to add NEEDED entries to the + # ELF RPATH, covering libraries that the bundled .so files depend on. + runtimeDependencies = [ + xrt + gcc-unwrapped.lib + fftw + fftwFloat + fftwLongDouble + ffmpeg + curl + openssl + boost + readline + libdrm + ]; + + # CMakeLists.txt lives in src/, not the repo root. + cmakeDir = "src"; + + preConfigure = '' + # ── 1. Populate the tokenizers-cpp submodule directory ─────────────────── + # CMakeLists.txt references the submodule as: + # add_subdirectory(''${CMAKE_SOURCE_DIR}/../third_party/tokenizers-cpp ...) + # The cmake setup hook unpacks sources to $TMPDIR/source; we write the + # submodule content there before cmake is invoked. + mkdir -p third_party/tokenizers-cpp + cp -r --no-preserve=mode,ownership "${tokenizers-cpp-src}/." \ + third_party/tokenizers-cpp/ + + # ── 2. Configure cargo to use the pre-vendored crates (offline) ────────── + mkdir -p third_party/tokenizers-cpp/rust/.cargo + cat > third_party/tokenizers-cpp/rust/.cargo/config.toml << EOF + [source.crates-io] + replace-with = "vendored-sources" + + [source.vendored-sources] + directory = "${cargoVendorDir}" + EOF + ''; + + cmakeFlags = [ + # The build system requires these two version strings (checked at configure). + "-DFLM_VERSION=${version}" + "-DNPU_VERSION=32.0.203.311" + "-DCMAKE_BUILD_TYPE=Release" + # Override the default XRT install prefix (/opt/xilinx/xrt). + "-DXRT_INCLUDE_DIR=${xrt}/include" + "-DXRT_LIB_DIR=${xrt}/lib" + # xclbins/ path baked into the binary via CMAKE_XCLBIN_PREFIX. + "-DCMAKE_XCLBIN_PREFIX=${placeholder "out"}/share/flm" + ]; + + installPhase = '' + runHook preInstall + + cmake --install . --prefix "$out" + + # ── Copy bundled proprietary NPU kernel .so files ───────────────────────── + # The upstream CMakeLists installs them via: + # file(GLOB so_libs "''${CMAKE_SOURCE_DIR}/lib/*.so") + # install(FILES ''${so_libs} DESTINATION lib) + # and sets RPATH=$ORIGIN/../lib on the flm binary. + # We reproduce that layout: $out/lib/lib*.so alongside $out/bin/flm. + mkdir -p "$out/lib" + for so in "$src/src/lib"/lib*.so; do + install -m755 "$so" "$out/lib/" + done + + runHook postInstall + ''; + + # autoPatchelfHook runs automatically and patches the bundled .so files. + # We additionally fix the RPATH on the flm binary to include both: + # • $out/lib (bundled NPU .so files) + # • system libs path (XRT, ffmpeg, boost, …) + postFixup = '' + patchelf \ + --set-rpath "${lib.makeLibraryPath buildInputs}:$out/lib" \ + "$out/bin/flm" + ''; + + meta = with lib; { + description = "LLM runtime for AMD Ryzen AI XDNA 2 NPUs"; + longDescription = '' + FastFlowLM (FLM) runs large language models on AMD Ryzen AI (XDNA 2) + NPU silicon — Strix Point, Strix Halo, Kraken Point, Gorgon Point. + It provides an Ollama-compatible REST API (port 52625) and a CLI. + + Models are stored in ~/.config/flm/ by default; + override with the FLM_MODEL_PATH environment variable. + + Usage: + flm validate # check NPU driver + firmware health + flm run llama3.2:1b # interactive chat (downloads model on first run) + flm serve llama3.2:1b # OpenAI-compatible server on port 52625 + flm list # list available models + flm pull # pre-download a model + + System requirements: + • Linux >= 6.14 (amdxdna in-tree) or amdxdna-dkms on older kernels + • linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0) + • Unlimited memlock for the flm process, e.g. in NixOS: + security.pam.loginLimits = [{ + domain = "*"; type = "-"; + item = "memlock"; value = "unlimited"; + }]; + + License note: CLI/orchestration code is MIT. The bundled NPU kernel + shared libraries are proprietary (free for commercial use up to + USD 10 M annual revenue). See LICENSE_BINARY.txt upstream. + ''; + homepage = "https://fastflowlm.com"; + license = with licenses; [ + mit + unfreeRedistributable + ]; + mainProgram = "flm"; + platforms = [ "x86_64-linux" ]; + maintainers = [ ]; + }; +} diff --git a/packages/xrt/default.nix b/packages/xrt/default.nix new file mode 100644 index 0000000..b154357 --- /dev/null +++ b/packages/xrt/default.nix @@ -0,0 +1,114 @@ +{ + lib, + stdenv, + fetchFromGitHub, + cmake, + ninja, + pkg-config, + python3, + boost, + curl, + openssl, + systemd, + libdrm, + ncurses, + protobuf, + elfutils, + zlib, + rapidjson, + util-linux, # provides libuuid + xz, # provides liblzma +}: + +# AMD XRT (Xilinx Runtime) userspace library for NPU (XDNA 2) devices. +# +# This package builds the XRT base library from the commit pinned as a +# submodule in amd/xdna-driver. It provides: +# $out/lib/libxrt_coreutil.so — core utility library (linked by flm) +# $out/lib/libxrt_core.so — platform-independent core +# $out/include/xrt/ — public C++ headers +# $out/include/experimental/ +# +# The xrt source tree lives under the src/ subdirectory of the Xilinx/XRT +# repository (see src/CMakeLists.txt which includes CMake/nativeLnx.cmake). +# +# XRT version 2.19.0 — pinned to the commit used by amd/xdna-driver main +# as of 2026-03-25 (xrt @ 481583d). +# +# Runtime note: this package only provides the userspace library. The +# kernel driver (amdxdna.ko) is a separate concern: +# • Linux >= 6.14 ships it in-tree (boot.kernelPackages.linux_latest). +# • Older kernels can use hardware.amdxdna.enable (once packaged). + +stdenv.mkDerivation rec { + pname = "xrt"; + version = "2.19.0"; + + src = fetchFromGitHub { + owner = "Xilinx"; + repo = "XRT"; + rev = "481583db9a26cb506a37cab7f1881ae7c7de2f32"; + hash = "sha256-WLZDjuuEGd3i77zXpAJkfQy/AszdSQ9pagy64yGX58Q="; + fetchSubmodules = false; # XRT submodules are Windows-only tools + }; + + nativeBuildInputs = [ + cmake + ninja + pkg-config + python3 + ]; + + buildInputs = [ + boost + curl + openssl + systemd # for libudev (device enumeration) + libdrm + ncurses + protobuf + elfutils # libelf + zlib + rapidjson + util-linux # libuuid + xz # liblzma + ]; + + # XRT's CMakeLists.txt is in the src/ subdirectory. + cmakeDir = "src"; + + cmakeFlags = [ + "-DCMAKE_BUILD_TYPE=Release" + "-DCMAKE_INSTALL_PREFIX=${placeholder "out"}" + # Build the NPU/XDNA variant (skips PCIe FPGA-specific components). + "-DXRT_NATIVE_BUILD=yes" + # Disable components we do not need: + "-DXRT_ENABLE_WERROR=OFF" + # Install libraries to lib/ (some builds default to lib64/). + "-DCMAKE_INSTALL_LIBDIR=lib" + ]; + + # XRT's install target places a setup.sh in the prefix root; we don't need + # that for Nix — the binary wrapper / RPATH mechanism handles library lookup. + postInstall = '' + # Remove the CMake-generated setup.sh — not needed in a Nix env. + rm -f "$out"/setup.sh "$out"/setup.csh 2>/dev/null || true + ''; + + meta = with lib; { + description = "AMD XRT (Xilinx Runtime) userspace library for XDNA NPUs"; + longDescription = '' + XRT is the userspace component of AMD's XRT stack for their FPGA and + NPU devices. This package builds only the base library + (libxrt_coreutil, libxrt_core) that FastFlowLM links against to + communicate with the AMD XDNA 2 NPU via the amdxdna kernel driver. + + The kernel driver (amdxdna.ko) is built in since Linux 6.14. + For older kernels it can be loaded via a DKMS package. + ''; + homepage = "https://github.com/Xilinx/XRT"; + license = licenses.asl20; + platforms = [ "x86_64-linux" ]; + maintainers = [ ]; + }; +} diff --git a/systems/x86_64-linux/jallen-nas/sops.nix b/systems/x86_64-linux/jallen-nas/sops.nix index 3b25bb8..8d50f9e 100755 --- a/systems/x86_64-linux/jallen-nas/sops.nix +++ b/systems/x86_64-linux/jallen-nas/sops.nix @@ -366,6 +366,19 @@ in ]; }; + # Grafana reads ntfy credentials via systemd EnvironmentFile so the + # $__env{} provider works in alerting provisioning YAML. The file + # provider ($__file{}) only works in grafana.ini, not in provisioning. + "grafana.env" = { + content = '' + GRAFANA_NTFY_USER=${config.sops.placeholder."jallen-nas/ntfy/user"} + GRAFANA_NTFY_PASSWORD=${config.sops.placeholder."jallen-nas/ntfy/password"} + ''; + mode = "0400"; + owner = "grafana"; + restartUnits = [ "grafana.service" ]; + }; + # CrowdSec HTTP notification plugin config with credentials baked in. # The plugin process spawned by crowdsec/cscli reads this file directly. # Credentials are embedded in the URL using HTTP basic auth so no