init xrt and fflm

This commit is contained in:
mjallen18
2026-03-25 20:46:42 -05:00
parent 2013804b17
commit ab81e78b60
4 changed files with 404 additions and 28 deletions

View File

@@ -0,0 +1,248 @@
{
lib,
stdenv,
fetchFromGitHub,
cmake,
ninja,
pkg-config,
rustPlatform,
cargo,
rustc,
# C++ build-time dependencies
boost,
curl,
openssl,
fftw,
fftwFloat, # fftw3f (single-precision)
fftwLongDouble, # fftw3l (long-double-precision)
ffmpeg,
readline,
libdrm,
libuuid,
# ELF patching for the bundled proprietary .so files
autoPatchelfHook,
patchelf,
gcc-unwrapped,
# Access to other flake packages (packages/xrt)
pkgs,
namespace,
}:
# FastFlowLM (FLM) — Ollama-style LLM runtime for AMD Ryzen AI (XDNA 2) NPUs.
#
# Build overview
# ==============
# The repository contains:
# src/ C++20 CMake project → produces the `flm` binary
# third_party/
# tokenizers-cpp/ git submodule — builds tokenizers_cpp (C++) +
# libtokenizers_c.a (Rust staticlib via cargo)
# src/lib/*.so Proprietary NPU kernel libraries (pre-built, bundled)
# src/xclbins/ AIE bitstreams (pre-built, loaded at runtime by .so)
# src/model_list.json Model registry
#
# Runtime prerequisites (managed outside this package):
# • Linux >= 6.14 with amdxdna in-tree driver, or amdxdna-dkms on older
# kernels
# • linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0)
# • Memlock = unlimited for the FLM process
# • packages/xrt (libxrt_coreutil) built and available
#
# To update to a new release
# ==========================
# 1. Bump `version` below.
# 2. Update `srcHash` (run: nix-prefetch-git --url ...FastFlowLM --rev v<X>).
# 3. If the tokenizers-cpp submodule rev changed (check .gitmodules / git
# submodule status), update `tokenizersRev` and `tokenizersHash`:
# nix-prefetch-git --url .../tokenizers-cpp --rev <REV> --fetch-submodules
# 4. Update `cargoVendorHash`: set to lib.fakeHash, run nix build, copy hash.
let
version = "0.9.36";
# XRT userspace runtime — built from packages/xrt in this flake.
xrt = pkgs.${namespace}.xrt;
# ── tokenizers-cpp submodule ──────────────────────────────────────────────
# Pinned to the commit referenced in FastFlowLM v0.9.36 .gitmodules.
tokenizersRev = "34885cfd7b9ef27b859c28a41e71413dd31926f5";
tokenizers-cpp-src = fetchFromGitHub {
owner = "mlc-ai";
repo = "tokenizers-cpp";
rev = tokenizersRev;
# Includes sentencepiece + msgpack sub-submodules.
hash = "sha256-m3A9OhCXJgvvV9UbVL/ijaUC1zkLHlddnQLqZEA5t4w=";
fetchSubmodules = true;
};
# Vendor the Rust crates from tokenizers-cpp/rust/Cargo.toml offline.
# This fixed-output derivation has network access; everything else is sandboxed.
# To compute the hash: set to lib.fakeHash → nix build → copy printed hash.
cargoVendorDir = rustPlatform.fetchCargoVendor {
src = tokenizers-cpp-src;
sourceRoot = "source/rust";
hash = lib.fakeHash; # FIXME: replace after first successful build attempt
};
in
stdenv.mkDerivation rec {
pname = "fastflowlm";
inherit version;
src = fetchFromGitHub {
owner = "FastFlowLM";
repo = "FastFlowLM";
rev = "v${version}";
# We do NOT fetch submodules here — tokenizers-cpp is injected separately
# (above) so that its Rust deps can be vendored in a fixed-output derivation.
hash = "sha256-uq/ZxvJA5HTJbMxofO4Hrz7ULvV1fPC7OHRXulMqwqw=";
};
nativeBuildInputs = [
cmake
ninja
pkg-config
cargo
rustc
autoPatchelfHook
patchelf
];
buildInputs = [
boost
curl
openssl
fftw
fftwFloat
fftwLongDouble
ffmpeg
readline
libdrm
libuuid
xrt
# libstdc++ / libgcc_s needed at runtime by the bundled NPU .so files.
gcc-unwrapped.lib
];
# autoPatchelfHook uses runtimeDependencies to add NEEDED entries to the
# ELF RPATH, covering libraries that the bundled .so files depend on.
runtimeDependencies = [
xrt
gcc-unwrapped.lib
fftw
fftwFloat
fftwLongDouble
ffmpeg
curl
openssl
boost
readline
libdrm
];
# CMakeLists.txt lives in src/, not the repo root.
cmakeDir = "src";
preConfigure = ''
# 1. Populate the tokenizers-cpp submodule directory
# CMakeLists.txt references the submodule as:
# add_subdirectory(''${CMAKE_SOURCE_DIR}/../third_party/tokenizers-cpp ...)
# The cmake setup hook unpacks sources to $TMPDIR/source; we write the
# submodule content there before cmake is invoked.
mkdir -p third_party/tokenizers-cpp
cp -r --no-preserve=mode,ownership "${tokenizers-cpp-src}/." \
third_party/tokenizers-cpp/
# 2. Configure cargo to use the pre-vendored crates (offline)
mkdir -p third_party/tokenizers-cpp/rust/.cargo
cat > third_party/tokenizers-cpp/rust/.cargo/config.toml << EOF
[source.crates-io]
replace-with = "vendored-sources"
[source.vendored-sources]
directory = "${cargoVendorDir}"
EOF
'';
cmakeFlags = [
# The build system requires these two version strings (checked at configure).
"-DFLM_VERSION=${version}"
"-DNPU_VERSION=32.0.203.311"
"-DCMAKE_BUILD_TYPE=Release"
# Override the default XRT install prefix (/opt/xilinx/xrt).
"-DXRT_INCLUDE_DIR=${xrt}/include"
"-DXRT_LIB_DIR=${xrt}/lib"
# xclbins/ path baked into the binary via CMAKE_XCLBIN_PREFIX.
"-DCMAKE_XCLBIN_PREFIX=${placeholder "out"}/share/flm"
];
installPhase = ''
runHook preInstall
cmake --install . --prefix "$out"
# Copy bundled proprietary NPU kernel .so files
# The upstream CMakeLists installs them via:
# file(GLOB so_libs "''${CMAKE_SOURCE_DIR}/lib/*.so")
# install(FILES ''${so_libs} DESTINATION lib)
# and sets RPATH=$ORIGIN/../lib on the flm binary.
# We reproduce that layout: $out/lib/lib*.so alongside $out/bin/flm.
mkdir -p "$out/lib"
for so in "$src/src/lib"/lib*.so; do
install -m755 "$so" "$out/lib/"
done
runHook postInstall
'';
# autoPatchelfHook runs automatically and patches the bundled .so files.
# We additionally fix the RPATH on the flm binary to include both:
# • $out/lib (bundled NPU .so files)
# • system libs path (XRT, ffmpeg, boost, …)
postFixup = ''
patchelf \
--set-rpath "${lib.makeLibraryPath buildInputs}:$out/lib" \
"$out/bin/flm"
'';
meta = with lib; {
description = "LLM runtime for AMD Ryzen AI XDNA 2 NPUs";
longDescription = ''
FastFlowLM (FLM) runs large language models on AMD Ryzen AI (XDNA 2)
NPU silicon Strix Point, Strix Halo, Kraken Point, Gorgon Point.
It provides an Ollama-compatible REST API (port 52625) and a CLI.
Models are stored in ~/.config/flm/ by default;
override with the FLM_MODEL_PATH environment variable.
Usage:
flm validate # check NPU driver + firmware health
flm run llama3.2:1b # interactive chat (downloads model on first run)
flm serve llama3.2:1b # OpenAI-compatible server on port 52625
flm list # list available models
flm pull <model> # pre-download a model
System requirements:
Linux >= 6.14 (amdxdna in-tree) or amdxdna-dkms on older kernels
linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0)
Unlimited memlock for the flm process, e.g. in NixOS:
security.pam.loginLimits = [{
domain = "*"; type = "-";
item = "memlock"; value = "unlimited";
}];
License note: CLI/orchestration code is MIT. The bundled NPU kernel
shared libraries are proprietary (free for commercial use up to
USD 10 M annual revenue). See LICENSE_BINARY.txt upstream.
'';
homepage = "https://fastflowlm.com";
license = with licenses; [
mit
unfreeRedistributable
];
mainProgram = "flm";
platforms = [ "x86_64-linux" ];
maintainers = [ ];
};
}

114
packages/xrt/default.nix Normal file
View File

@@ -0,0 +1,114 @@
{
lib,
stdenv,
fetchFromGitHub,
cmake,
ninja,
pkg-config,
python3,
boost,
curl,
openssl,
systemd,
libdrm,
ncurses,
protobuf,
elfutils,
zlib,
rapidjson,
util-linux, # provides libuuid
xz, # provides liblzma
}:
# AMD XRT (Xilinx Runtime) userspace library for NPU (XDNA 2) devices.
#
# This package builds the XRT base library from the commit pinned as a
# submodule in amd/xdna-driver. It provides:
# $out/lib/libxrt_coreutil.so — core utility library (linked by flm)
# $out/lib/libxrt_core.so — platform-independent core
# $out/include/xrt/ — public C++ headers
# $out/include/experimental/
#
# The xrt source tree lives under the src/ subdirectory of the Xilinx/XRT
# repository (see src/CMakeLists.txt which includes CMake/nativeLnx.cmake).
#
# XRT version 2.19.0 — pinned to the commit used by amd/xdna-driver main
# as of 2026-03-25 (xrt @ 481583d).
#
# Runtime note: this package only provides the userspace library. The
# kernel driver (amdxdna.ko) is a separate concern:
# • Linux >= 6.14 ships it in-tree (boot.kernelPackages.linux_latest).
# • Older kernels can use hardware.amdxdna.enable (once packaged).
stdenv.mkDerivation rec {
pname = "xrt";
version = "2.19.0";
src = fetchFromGitHub {
owner = "Xilinx";
repo = "XRT";
rev = "481583db9a26cb506a37cab7f1881ae7c7de2f32";
hash = "sha256-WLZDjuuEGd3i77zXpAJkfQy/AszdSQ9pagy64yGX58Q=";
fetchSubmodules = false; # XRT submodules are Windows-only tools
};
nativeBuildInputs = [
cmake
ninja
pkg-config
python3
];
buildInputs = [
boost
curl
openssl
systemd # for libudev (device enumeration)
libdrm
ncurses
protobuf
elfutils # libelf
zlib
rapidjson
util-linux # libuuid
xz # liblzma
];
# XRT's CMakeLists.txt is in the src/ subdirectory.
cmakeDir = "src";
cmakeFlags = [
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_INSTALL_PREFIX=${placeholder "out"}"
# Build the NPU/XDNA variant (skips PCIe FPGA-specific components).
"-DXRT_NATIVE_BUILD=yes"
# Disable components we do not need:
"-DXRT_ENABLE_WERROR=OFF"
# Install libraries to lib/ (some builds default to lib64/).
"-DCMAKE_INSTALL_LIBDIR=lib"
];
# XRT's install target places a setup.sh in the prefix root; we don't need
# that for Nix — the binary wrapper / RPATH mechanism handles library lookup.
postInstall = ''
# Remove the CMake-generated setup.sh not needed in a Nix env.
rm -f "$out"/setup.sh "$out"/setup.csh 2>/dev/null || true
'';
meta = with lib; {
description = "AMD XRT (Xilinx Runtime) userspace library for XDNA NPUs";
longDescription = ''
XRT is the userspace component of AMD's XRT stack for their FPGA and
NPU devices. This package builds only the base library
(libxrt_coreutil, libxrt_core) that FastFlowLM links against to
communicate with the AMD XDNA 2 NPU via the amdxdna kernel driver.
The kernel driver (amdxdna.ko) is built in since Linux 6.14.
For older kernels it can be loaded via a DKMS package.
'';
homepage = "https://github.com/Xilinx/XRT";
license = licenses.asl20;
platforms = [ "x86_64-linux" ];
maintainers = [ ];
};
}