249 lines
8.0 KiB
Nix
249 lines
8.0 KiB
Nix
{
|
|
lib,
|
|
stdenv,
|
|
fetchFromGitHub,
|
|
cmake,
|
|
ninja,
|
|
pkg-config,
|
|
rustPlatform,
|
|
cargo,
|
|
rustc,
|
|
# C++ build-time dependencies
|
|
boost,
|
|
curl,
|
|
openssl,
|
|
fftw,
|
|
fftwFloat, # fftw3f (single-precision)
|
|
fftwLongDouble, # fftw3l (long-double-precision)
|
|
ffmpeg,
|
|
readline,
|
|
libdrm,
|
|
libuuid,
|
|
# ELF patching for the bundled proprietary .so files
|
|
autoPatchelfHook,
|
|
patchelf,
|
|
gcc-unwrapped,
|
|
# Access to other flake packages (packages/xrt)
|
|
pkgs,
|
|
namespace,
|
|
}:
|
|
|
|
# FastFlowLM (FLM) — Ollama-style LLM runtime for AMD Ryzen AI (XDNA 2) NPUs.
|
|
#
|
|
# Build overview
|
|
# ==============
|
|
# The repository contains:
|
|
# src/ C++20 CMake project → produces the `flm` binary
|
|
# third_party/
|
|
# tokenizers-cpp/ git submodule — builds tokenizers_cpp (C++) +
|
|
# libtokenizers_c.a (Rust staticlib via cargo)
|
|
# src/lib/*.so Proprietary NPU kernel libraries (pre-built, bundled)
|
|
# src/xclbins/ AIE bitstreams (pre-built, loaded at runtime by .so)
|
|
# src/model_list.json Model registry
|
|
#
|
|
# Runtime prerequisites (managed outside this package):
|
|
# • Linux >= 6.14 with amdxdna in-tree driver, or amdxdna-dkms on older
|
|
# kernels
|
|
# • linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0)
|
|
# • Memlock = unlimited for the FLM process
|
|
# • packages/xrt (libxrt_coreutil) built and available
|
|
#
|
|
# To update to a new release
|
|
# ==========================
|
|
# 1. Bump `version` below.
|
|
# 2. Update `srcHash` (run: nix-prefetch-git --url ...FastFlowLM --rev v<X>).
|
|
# 3. If the tokenizers-cpp submodule rev changed (check .gitmodules / git
|
|
# submodule status), update `tokenizersRev` and `tokenizersHash`:
|
|
# nix-prefetch-git --url .../tokenizers-cpp --rev <REV> --fetch-submodules
|
|
# 4. Update `cargoVendorHash`: set to lib.fakeHash, run nix build, copy hash.
|
|
|
|
let
|
|
version = "0.9.36";
|
|
|
|
# XRT userspace runtime — built from packages/xrt in this flake.
|
|
xrt = pkgs.${namespace}.xrt;
|
|
|
|
# ── tokenizers-cpp submodule ──────────────────────────────────────────────
|
|
# Pinned to the commit referenced in FastFlowLM v0.9.36 .gitmodules.
|
|
tokenizersRev = "34885cfd7b9ef27b859c28a41e71413dd31926f5";
|
|
|
|
tokenizers-cpp-src = fetchFromGitHub {
|
|
owner = "mlc-ai";
|
|
repo = "tokenizers-cpp";
|
|
rev = tokenizersRev;
|
|
# Includes sentencepiece + msgpack sub-submodules.
|
|
hash = "sha256-m3A9OhCXJgvvV9UbVL/ijaUC1zkLHlddnQLqZEA5t4w=";
|
|
fetchSubmodules = true;
|
|
};
|
|
|
|
# Vendor the Rust crates from tokenizers-cpp/rust/Cargo.toml offline.
|
|
# This fixed-output derivation has network access; everything else is sandboxed.
|
|
# To compute the hash: set to lib.fakeHash → nix build → copy printed hash.
|
|
cargoVendorDir = rustPlatform.fetchCargoVendor {
|
|
src = tokenizers-cpp-src;
|
|
sourceRoot = "source/rust";
|
|
hash = lib.fakeHash; # FIXME: replace after first successful build attempt
|
|
};
|
|
|
|
in
|
|
stdenv.mkDerivation rec {
|
|
pname = "fastflowlm";
|
|
inherit version;
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "FastFlowLM";
|
|
repo = "FastFlowLM";
|
|
rev = "v${version}";
|
|
# We do NOT fetch submodules here — tokenizers-cpp is injected separately
|
|
# (above) so that its Rust deps can be vendored in a fixed-output derivation.
|
|
hash = "sha256-uq/ZxvJA5HTJbMxofO4Hrz7ULvV1fPC7OHRXulMqwqw=";
|
|
};
|
|
|
|
nativeBuildInputs = [
|
|
cmake
|
|
ninja
|
|
pkg-config
|
|
cargo
|
|
rustc
|
|
autoPatchelfHook
|
|
patchelf
|
|
];
|
|
|
|
buildInputs = [
|
|
boost
|
|
curl
|
|
openssl
|
|
fftw
|
|
fftwFloat
|
|
fftwLongDouble
|
|
ffmpeg
|
|
readline
|
|
libdrm
|
|
libuuid
|
|
xrt
|
|
# libstdc++ / libgcc_s needed at runtime by the bundled NPU .so files.
|
|
gcc-unwrapped.lib
|
|
];
|
|
|
|
# autoPatchelfHook uses runtimeDependencies to add NEEDED entries to the
|
|
# ELF RPATH, covering libraries that the bundled .so files depend on.
|
|
runtimeDependencies = [
|
|
xrt
|
|
gcc-unwrapped.lib
|
|
fftw
|
|
fftwFloat
|
|
fftwLongDouble
|
|
ffmpeg
|
|
curl
|
|
openssl
|
|
boost
|
|
readline
|
|
libdrm
|
|
];
|
|
|
|
# CMakeLists.txt lives in src/, not the repo root.
|
|
cmakeDir = "src";
|
|
|
|
preConfigure = ''
|
|
# ── 1. Populate the tokenizers-cpp submodule directory ───────────────────
|
|
# CMakeLists.txt references the submodule as:
|
|
# add_subdirectory(''${CMAKE_SOURCE_DIR}/../third_party/tokenizers-cpp ...)
|
|
# The cmake setup hook unpacks sources to $TMPDIR/source; we write the
|
|
# submodule content there before cmake is invoked.
|
|
mkdir -p third_party/tokenizers-cpp
|
|
cp -r --no-preserve=mode,ownership "${tokenizers-cpp-src}/." \
|
|
third_party/tokenizers-cpp/
|
|
|
|
# ── 2. Configure cargo to use the pre-vendored crates (offline) ──────────
|
|
mkdir -p third_party/tokenizers-cpp/rust/.cargo
|
|
cat > third_party/tokenizers-cpp/rust/.cargo/config.toml << EOF
|
|
[source.crates-io]
|
|
replace-with = "vendored-sources"
|
|
|
|
[source.vendored-sources]
|
|
directory = "${cargoVendorDir}"
|
|
EOF
|
|
'';
|
|
|
|
cmakeFlags = [
|
|
# The build system requires these two version strings (checked at configure).
|
|
"-DFLM_VERSION=${version}"
|
|
"-DNPU_VERSION=32.0.203.311"
|
|
"-DCMAKE_BUILD_TYPE=Release"
|
|
# Override the default XRT install prefix (/opt/xilinx/xrt).
|
|
"-DXRT_INCLUDE_DIR=${xrt}/include"
|
|
"-DXRT_LIB_DIR=${xrt}/lib"
|
|
# xclbins/ path baked into the binary via CMAKE_XCLBIN_PREFIX.
|
|
"-DCMAKE_XCLBIN_PREFIX=${placeholder "out"}/share/flm"
|
|
];
|
|
|
|
installPhase = ''
|
|
runHook preInstall
|
|
|
|
cmake --install . --prefix "$out"
|
|
|
|
# ── Copy bundled proprietary NPU kernel .so files ─────────────────────────
|
|
# The upstream CMakeLists installs them via:
|
|
# file(GLOB so_libs "''${CMAKE_SOURCE_DIR}/lib/*.so")
|
|
# install(FILES ''${so_libs} DESTINATION lib)
|
|
# and sets RPATH=$ORIGIN/../lib on the flm binary.
|
|
# We reproduce that layout: $out/lib/lib*.so alongside $out/bin/flm.
|
|
mkdir -p "$out/lib"
|
|
for so in "$src/src/lib"/lib*.so; do
|
|
install -m755 "$so" "$out/lib/"
|
|
done
|
|
|
|
runHook postInstall
|
|
'';
|
|
|
|
# autoPatchelfHook runs automatically and patches the bundled .so files.
|
|
# We additionally fix the RPATH on the flm binary to include both:
|
|
# • $out/lib (bundled NPU .so files)
|
|
# • system libs path (XRT, ffmpeg, boost, …)
|
|
postFixup = ''
|
|
patchelf \
|
|
--set-rpath "${lib.makeLibraryPath buildInputs}:$out/lib" \
|
|
"$out/bin/flm"
|
|
'';
|
|
|
|
meta = with lib; {
|
|
description = "LLM runtime for AMD Ryzen AI XDNA 2 NPUs";
|
|
longDescription = ''
|
|
FastFlowLM (FLM) runs large language models on AMD Ryzen AI (XDNA 2)
|
|
NPU silicon — Strix Point, Strix Halo, Kraken Point, Gorgon Point.
|
|
It provides an Ollama-compatible REST API (port 52625) and a CLI.
|
|
|
|
Models are stored in ~/.config/flm/ by default;
|
|
override with the FLM_MODEL_PATH environment variable.
|
|
|
|
Usage:
|
|
flm validate # check NPU driver + firmware health
|
|
flm run llama3.2:1b # interactive chat (downloads model on first run)
|
|
flm serve llama3.2:1b # OpenAI-compatible server on port 52625
|
|
flm list # list available models
|
|
flm pull <model> # pre-download a model
|
|
|
|
System requirements:
|
|
• Linux >= 6.14 (amdxdna in-tree) or amdxdna-dkms on older kernels
|
|
• linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0)
|
|
• Unlimited memlock for the flm process, e.g. in NixOS:
|
|
security.pam.loginLimits = [{
|
|
domain = "*"; type = "-";
|
|
item = "memlock"; value = "unlimited";
|
|
}];
|
|
|
|
License note: CLI/orchestration code is MIT. The bundled NPU kernel
|
|
shared libraries are proprietary (free for commercial use up to
|
|
USD 10 M annual revenue). See LICENSE_BINARY.txt upstream.
|
|
'';
|
|
homepage = "https://fastflowlm.com";
|
|
license = with licenses; [
|
|
mit
|
|
unfreeRedistributable
|
|
];
|
|
mainProgram = "flm";
|
|
platforms = [ "x86_64-linux" ];
|
|
maintainers = [ ];
|
|
};
|
|
}
|