init xrt and fflm

2026-03-25 20:46:42 -05:00
parent 2013804b17
commit ab81e78b60
4 changed files with 404 additions and 28 deletions
--- a/packages/fastflowlm/default.nix
+++ b/packages/fastflowlm/default.nix
@@ -0,0 +1,248 @@
+{
+  lib,
+  stdenv,
+  fetchFromGitHub,
+  cmake,
+  ninja,
+  pkg-config,
+  rustPlatform,
+  cargo,
+  rustc,
+  # C++ build-time dependencies
+  boost,
+  curl,
+  openssl,
+  fftw,
+  fftwFloat, # fftw3f (single-precision)
+  fftwLongDouble, # fftw3l (long-double-precision)
+  ffmpeg,
+  readline,
+  libdrm,
+  libuuid,
+  # ELF patching for the bundled proprietary .so files
+  autoPatchelfHook,
+  patchelf,
+  gcc-unwrapped,
+  # Access to other flake packages (packages/xrt)
+  pkgs,
+  namespace,
+}:
+
+# FastFlowLM (FLM) — Ollama-style LLM runtime for AMD Ryzen AI (XDNA 2) NPUs.
+#
+# Build overview
+# ==============
+# The repository contains:
+#   src/                  C++20 CMake project → produces the `flm` binary
+#   third_party/
+#     tokenizers-cpp/     git submodule — builds tokenizers_cpp (C++) +
+#                         libtokenizers_c.a (Rust staticlib via cargo)
+#   src/lib/*.so          Proprietary NPU kernel libraries (pre-built, bundled)
+#   src/xclbins/          AIE bitstreams (pre-built, loaded at runtime by .so)
+#   src/model_list.json   Model registry
+#
+# Runtime prerequisites (managed outside this package):
+#   • Linux >= 6.14 with amdxdna in-tree driver, or amdxdna-dkms on older
+#     kernels
+#   • linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0)
+#   • Memlock = unlimited for the FLM process
+#   • packages/xrt (libxrt_coreutil) built and available
+#
+# To update to a new release
+# ==========================
+#  1. Bump `version` below.
+#  2. Update `srcHash` (run: nix-prefetch-git --url ...FastFlowLM --rev v<X>).
+#  3. If the tokenizers-cpp submodule rev changed (check .gitmodules / git
+#     submodule status), update `tokenizersRev` and `tokenizersHash`:
+#       nix-prefetch-git --url .../tokenizers-cpp --rev <REV> --fetch-submodules
+#  4. Update `cargoVendorHash`: set to lib.fakeHash, run nix build, copy hash.
+
+let
+  version = "0.9.36";
+
+  # XRT userspace runtime — built from packages/xrt in this flake.
+  xrt = pkgs.${namespace}.xrt;
+
+  # ── tokenizers-cpp submodule ──────────────────────────────────────────────
+  # Pinned to the commit referenced in FastFlowLM v0.9.36 .gitmodules.
+  tokenizersRev = "34885cfd7b9ef27b859c28a41e71413dd31926f5";
+
+  tokenizers-cpp-src = fetchFromGitHub {
+    owner = "mlc-ai";
+    repo = "tokenizers-cpp";
+    rev = tokenizersRev;
+    # Includes sentencepiece + msgpack sub-submodules.
+    hash = "sha256-m3A9OhCXJgvvV9UbVL/ijaUC1zkLHlddnQLqZEA5t4w=";
+    fetchSubmodules = true;
+  };
+
+  # Vendor the Rust crates from tokenizers-cpp/rust/Cargo.toml offline.
+  # This fixed-output derivation has network access; everything else is sandboxed.
+  # To compute the hash:  set to lib.fakeHash → nix build → copy printed hash.
+  cargoVendorDir = rustPlatform.fetchCargoVendor {
+    src = tokenizers-cpp-src;
+    sourceRoot = "source/rust";
+    hash = lib.fakeHash; # FIXME: replace after first successful build attempt
+  };
+
+in
+stdenv.mkDerivation rec {
+  pname = "fastflowlm";
+  inherit version;
+
+  src = fetchFromGitHub {
+    owner = "FastFlowLM";
+    repo = "FastFlowLM";
+    rev = "v${version}";
+    # We do NOT fetch submodules here — tokenizers-cpp is injected separately
+    # (above) so that its Rust deps can be vendored in a fixed-output derivation.
+    hash = "sha256-uq/ZxvJA5HTJbMxofO4Hrz7ULvV1fPC7OHRXulMqwqw=";
+  };
+
+  nativeBuildInputs = [
+    cmake
+    ninja
+    pkg-config
+    cargo
+    rustc
+    autoPatchelfHook
+    patchelf
+  ];
+
+  buildInputs = [
+    boost
+    curl
+    openssl
+    fftw
+    fftwFloat
+    fftwLongDouble
+    ffmpeg
+    readline
+    libdrm
+    libuuid
+    xrt
+    # libstdc++ / libgcc_s needed at runtime by the bundled NPU .so files.
+    gcc-unwrapped.lib
+  ];
+
+  # autoPatchelfHook uses runtimeDependencies to add NEEDED entries to the
+  # ELF RPATH, covering libraries that the bundled .so files depend on.
+  runtimeDependencies = [
+    xrt
+    gcc-unwrapped.lib
+    fftw
+    fftwFloat
+    fftwLongDouble
+    ffmpeg
+    curl
+    openssl
+    boost
+    readline
+    libdrm
+  ];
+
+  # CMakeLists.txt lives in src/, not the repo root.
+  cmakeDir = "src";
+
+  preConfigure = ''
+        # ── 1. Populate the tokenizers-cpp submodule directory ───────────────────
+        # CMakeLists.txt references the submodule as:
+        #   add_subdirectory(''${CMAKE_SOURCE_DIR}/../third_party/tokenizers-cpp ...)
+        # The cmake setup hook unpacks sources to $TMPDIR/source; we write the
+        # submodule content there before cmake is invoked.
+        mkdir -p third_party/tokenizers-cpp
+        cp -r --no-preserve=mode,ownership "${tokenizers-cpp-src}/." \
+              third_party/tokenizers-cpp/
+
+        # ── 2. Configure cargo to use the pre-vendored crates (offline) ──────────
+        mkdir -p third_party/tokenizers-cpp/rust/.cargo
+        cat > third_party/tokenizers-cpp/rust/.cargo/config.toml << EOF
+    [source.crates-io]
+    replace-with = "vendored-sources"
+
+    [source.vendored-sources]
+    directory = "${cargoVendorDir}"
+    EOF
+  '';
+
+  cmakeFlags = [
+    # The build system requires these two version strings (checked at configure).
+    "-DFLM_VERSION=${version}"
+    "-DNPU_VERSION=32.0.203.311"
+    "-DCMAKE_BUILD_TYPE=Release"
+    # Override the default XRT install prefix (/opt/xilinx/xrt).
+    "-DXRT_INCLUDE_DIR=${xrt}/include"
+    "-DXRT_LIB_DIR=${xrt}/lib"
+    # xclbins/ path baked into the binary via CMAKE_XCLBIN_PREFIX.
+    "-DCMAKE_XCLBIN_PREFIX=${placeholder "out"}/share/flm"
+  ];
+
+  installPhase = ''
+    runHook preInstall
+
+    cmake --install . --prefix "$out"
+
+    # ── Copy bundled proprietary NPU kernel .so files ─────────────────────────
+    # The upstream CMakeLists installs them via:
+    #   file(GLOB so_libs "''${CMAKE_SOURCE_DIR}/lib/*.so")
+    #   install(FILES ''${so_libs} DESTINATION lib)
+    # and sets RPATH=$ORIGIN/../lib on the flm binary.
+    # We reproduce that layout: $out/lib/lib*.so alongside $out/bin/flm.
+    mkdir -p "$out/lib"
+    for so in "$src/src/lib"/lib*.so; do
+      install -m755 "$so" "$out/lib/"
+    done
+
+    runHook postInstall
+  '';
+
+  # autoPatchelfHook runs automatically and patches the bundled .so files.
+  # We additionally fix the RPATH on the flm binary to include both:
+  #   • $out/lib          (bundled NPU .so files)
+  #   • system libs path  (XRT, ffmpeg, boost, …)
+  postFixup = ''
+    patchelf \
+      --set-rpath "${lib.makeLibraryPath buildInputs}:$out/lib" \
+      "$out/bin/flm"
+  '';
+
+  meta = with lib; {
+    description = "LLM runtime for AMD Ryzen AI XDNA 2 NPUs";
+    longDescription = ''
+      FastFlowLM (FLM) runs large language models on AMD Ryzen AI (XDNA 2)
+      NPU silicon — Strix Point, Strix Halo, Kraken Point, Gorgon Point.
+      It provides an Ollama-compatible REST API (port 52625) and a CLI.
+
+      Models are stored in ~/.config/flm/ by default;
+      override with the FLM_MODEL_PATH environment variable.
+
+      Usage:
+        flm validate           # check NPU driver + firmware health
+        flm run llama3.2:1b    # interactive chat (downloads model on first run)
+        flm serve llama3.2:1b  # OpenAI-compatible server on port 52625
+        flm list               # list available models
+        flm pull <model>       # pre-download a model
+
+      System requirements:
+        • Linux >= 6.14 (amdxdna in-tree) or amdxdna-dkms on older kernels
+        • linux-firmware >= 20260221  (NPU firmware >= 1.1.0.0)
+        • Unlimited memlock for the flm process, e.g. in NixOS:
+            security.pam.loginLimits = [{
+              domain = "*"; type = "-";
+              item = "memlock"; value = "unlimited";
+            }];
+
+      License note: CLI/orchestration code is MIT.  The bundled NPU kernel
+      shared libraries are proprietary (free for commercial use up to
+      USD 10 M annual revenue).  See LICENSE_BINARY.txt upstream.
+    '';
+    homepage = "https://fastflowlm.com";
+    license = with licenses; [
+      mit
+      unfreeRedistributable
+    ];
+    mainProgram = "flm";
+    platforms = [ "x86_64-linux" ];
+    maintainers = [ ];
+  };
+}
--- a/packages/xrt/default.nix
+++ b/packages/xrt/default.nix
@@ -0,0 +1,114 @@
+{
+  lib,
+  stdenv,
+  fetchFromGitHub,
+  cmake,
+  ninja,
+  pkg-config,
+  python3,
+  boost,
+  curl,
+  openssl,
+  systemd,
+  libdrm,
+  ncurses,
+  protobuf,
+  elfutils,
+  zlib,
+  rapidjson,
+  util-linux, # provides libuuid
+  xz, # provides liblzma
+}:
+
+# AMD XRT (Xilinx Runtime) userspace library for NPU (XDNA 2) devices.
+#
+# This package builds the XRT base library from the commit pinned as a
+# submodule in amd/xdna-driver.  It provides:
+#   $out/lib/libxrt_coreutil.so    — core utility library (linked by flm)
+#   $out/lib/libxrt_core.so        — platform-independent core
+#   $out/include/xrt/              — public C++ headers
+#   $out/include/experimental/
+#
+# The xrt source tree lives under the src/ subdirectory of the Xilinx/XRT
+# repository (see src/CMakeLists.txt which includes CMake/nativeLnx.cmake).
+#
+# XRT version 2.19.0 — pinned to the commit used by amd/xdna-driver main
+# as of 2026-03-25 (xrt @ 481583d).
+#
+# Runtime note: this package only provides the userspace library.  The
+# kernel driver (amdxdna.ko) is a separate concern:
+#   • Linux >= 6.14 ships it in-tree (boot.kernelPackages.linux_latest).
+#   • Older kernels can use hardware.amdxdna.enable (once packaged).
+
+stdenv.mkDerivation rec {
+  pname = "xrt";
+  version = "2.19.0";
+
+  src = fetchFromGitHub {
+    owner = "Xilinx";
+    repo = "XRT";
+    rev = "481583db9a26cb506a37cab7f1881ae7c7de2f32";
+    hash = "sha256-WLZDjuuEGd3i77zXpAJkfQy/AszdSQ9pagy64yGX58Q=";
+    fetchSubmodules = false; # XRT submodules are Windows-only tools
+  };
+
+  nativeBuildInputs = [
+    cmake
+    ninja
+    pkg-config
+    python3
+  ];
+
+  buildInputs = [
+    boost
+    curl
+    openssl
+    systemd # for libudev (device enumeration)
+    libdrm
+    ncurses
+    protobuf
+    elfutils # libelf
+    zlib
+    rapidjson
+    util-linux # libuuid
+    xz # liblzma
+  ];
+
+  # XRT's CMakeLists.txt is in the src/ subdirectory.
+  cmakeDir = "src";
+
+  cmakeFlags = [
+    "-DCMAKE_BUILD_TYPE=Release"
+    "-DCMAKE_INSTALL_PREFIX=${placeholder "out"}"
+    # Build the NPU/XDNA variant (skips PCIe FPGA-specific components).
+    "-DXRT_NATIVE_BUILD=yes"
+    # Disable components we do not need:
+    "-DXRT_ENABLE_WERROR=OFF"
+    # Install libraries to lib/ (some builds default to lib64/).
+    "-DCMAKE_INSTALL_LIBDIR=lib"
+  ];
+
+  # XRT's install target places a setup.sh in the prefix root; we don't need
+  # that for Nix — the binary wrapper / RPATH mechanism handles library lookup.
+  postInstall = ''
+    # Remove the CMake-generated setup.sh — not needed in a Nix env.
+    rm -f "$out"/setup.sh "$out"/setup.csh 2>/dev/null || true
+  '';
+
+  meta = with lib; {
+    description = "AMD XRT (Xilinx Runtime) userspace library for XDNA NPUs";
+    longDescription = ''
+      XRT is the userspace component of AMD's XRT stack for their FPGA and
+      NPU devices.  This package builds only the base library
+      (libxrt_coreutil, libxrt_core) that FastFlowLM links against to
+      communicate with the AMD XDNA 2 NPU via the amdxdna kernel driver.
+
+      The kernel driver (amdxdna.ko) is built in since Linux 6.14.
+      For older kernels it can be loaded via a DKMS package.
+    '';
+    homepage = "https://github.com/Xilinx/XRT";
+    license = licenses.asl20;
+    platforms = [ "x86_64-linux" ];
+    maintainers = [ ];
+  };
+}