{ lib, stdenv, fetchFromGitHub, cmake, ninja, pkg-config, rustPlatform, cargo, rustc, # C++ build-time dependencies boost, curl, openssl, fftw, fftwFloat, # fftw3f (single-precision) fftwLongDouble, # fftw3l (long-double-precision) ffmpeg, readline, libdrm, libuuid, # ELF patching for the bundled proprietary .so files autoPatchelfHook, patchelf, gcc-unwrapped, # Access to other flake packages (packages/xrt) pkgs, namespace, }: # FastFlowLM (FLM) — Ollama-style LLM runtime for AMD Ryzen AI (XDNA 2) NPUs. # # Build overview # ============== # The repository contains: # src/ C++20 CMake project → produces the `flm` binary # third_party/ # tokenizers-cpp/ git submodule — builds tokenizers_cpp (C++) + # libtokenizers_c.a (Rust staticlib via cargo) # src/lib/*.so Proprietary NPU kernel libraries (pre-built, bundled) # src/xclbins/ AIE bitstreams (pre-built, loaded at runtime by .so) # src/model_list.json Model registry # # Runtime prerequisites (managed outside this package): # • Linux >= 6.14 with amdxdna in-tree driver, or amdxdna-dkms on older # kernels # • linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0) # • Memlock = unlimited for the FLM process # • packages/xrt (libxrt_coreutil) built and available # # To update to a new release # ========================== # 1. Bump `version` below. # 2. Update `srcHash` (run: nix-prefetch-git --url ...FastFlowLM --rev v). # 3. If the tokenizers-cpp submodule rev changed (check .gitmodules / git # submodule status), update `tokenizersRev` and `tokenizersHash`: # nix-prefetch-git --url .../tokenizers-cpp --rev --fetch-submodules # 4. Update `cargoVendorHash`: set to lib.fakeHash, run nix build, copy hash. let version = "0.9.36"; # XRT userspace runtime — built from packages/xrt in this flake. xrt = pkgs.${namespace}.xrt; # ── tokenizers-cpp submodule ────────────────────────────────────────────── # Pinned to the commit referenced in FastFlowLM v0.9.36 .gitmodules. tokenizersRev = "34885cfd7b9ef27b859c28a41e71413dd31926f5"; tokenizers-cpp-src = fetchFromGitHub { owner = "mlc-ai"; repo = "tokenizers-cpp"; rev = tokenizersRev; # Includes sentencepiece + msgpack sub-submodules. hash = "sha256-m3A9OhCXJgvvV9UbVL/ijaUC1zkLHlddnQLqZEA5t4w="; fetchSubmodules = true; }; # Vendor the Rust crates from tokenizers-cpp/rust/Cargo.toml offline. # This fixed-output derivation has network access; everything else is sandboxed. # To compute the hash: set to lib.fakeHash → nix build → copy printed hash. cargoVendorDir = rustPlatform.fetchCargoVendor { src = tokenizers-cpp-src; sourceRoot = "source/rust"; hash = lib.fakeHash; # FIXME: replace after first successful build attempt }; in stdenv.mkDerivation rec { pname = "fastflowlm"; inherit version; src = fetchFromGitHub { owner = "FastFlowLM"; repo = "FastFlowLM"; rev = "v${version}"; # We do NOT fetch submodules here — tokenizers-cpp is injected separately # (above) so that its Rust deps can be vendored in a fixed-output derivation. hash = "sha256-uq/ZxvJA5HTJbMxofO4Hrz7ULvV1fPC7OHRXulMqwqw="; }; nativeBuildInputs = [ cmake ninja pkg-config cargo rustc autoPatchelfHook patchelf ]; buildInputs = [ boost curl openssl fftw fftwFloat fftwLongDouble ffmpeg readline libdrm libuuid xrt # libstdc++ / libgcc_s needed at runtime by the bundled NPU .so files. gcc-unwrapped.lib ]; # autoPatchelfHook uses runtimeDependencies to add NEEDED entries to the # ELF RPATH, covering libraries that the bundled .so files depend on. runtimeDependencies = [ xrt gcc-unwrapped.lib fftw fftwFloat fftwLongDouble ffmpeg curl openssl boost readline libdrm ]; # CMakeLists.txt lives in src/, not the repo root. cmakeDir = "src"; preConfigure = '' # ── 1. Populate the tokenizers-cpp submodule directory ─────────────────── # CMakeLists.txt references the submodule as: # add_subdirectory(''${CMAKE_SOURCE_DIR}/../third_party/tokenizers-cpp ...) # The cmake setup hook unpacks sources to $TMPDIR/source; we write the # submodule content there before cmake is invoked. mkdir -p third_party/tokenizers-cpp cp -r --no-preserve=mode,ownership "${tokenizers-cpp-src}/." \ third_party/tokenizers-cpp/ # ── 2. Configure cargo to use the pre-vendored crates (offline) ────────── mkdir -p third_party/tokenizers-cpp/rust/.cargo cat > third_party/tokenizers-cpp/rust/.cargo/config.toml << EOF [source.crates-io] replace-with = "vendored-sources" [source.vendored-sources] directory = "${cargoVendorDir}" EOF ''; cmakeFlags = [ # The build system requires these two version strings (checked at configure). "-DFLM_VERSION=${version}" "-DNPU_VERSION=32.0.203.311" "-DCMAKE_BUILD_TYPE=Release" # Override the default XRT install prefix (/opt/xilinx/xrt). "-DXRT_INCLUDE_DIR=${xrt}/include" "-DXRT_LIB_DIR=${xrt}/lib" # xclbins/ path baked into the binary via CMAKE_XCLBIN_PREFIX. "-DCMAKE_XCLBIN_PREFIX=${placeholder "out"}/share/flm" ]; installPhase = '' runHook preInstall cmake --install . --prefix "$out" # ── Copy bundled proprietary NPU kernel .so files ───────────────────────── # The upstream CMakeLists installs them via: # file(GLOB so_libs "''${CMAKE_SOURCE_DIR}/lib/*.so") # install(FILES ''${so_libs} DESTINATION lib) # and sets RPATH=$ORIGIN/../lib on the flm binary. # We reproduce that layout: $out/lib/lib*.so alongside $out/bin/flm. mkdir -p "$out/lib" for so in "$src/src/lib"/lib*.so; do install -m755 "$so" "$out/lib/" done runHook postInstall ''; # autoPatchelfHook runs automatically and patches the bundled .so files. # We additionally fix the RPATH on the flm binary to include both: # • $out/lib (bundled NPU .so files) # • system libs path (XRT, ffmpeg, boost, …) postFixup = '' patchelf \ --set-rpath "${lib.makeLibraryPath buildInputs}:$out/lib" \ "$out/bin/flm" ''; meta = with lib; { description = "LLM runtime for AMD Ryzen AI XDNA 2 NPUs"; longDescription = '' FastFlowLM (FLM) runs large language models on AMD Ryzen AI (XDNA 2) NPU silicon — Strix Point, Strix Halo, Kraken Point, Gorgon Point. It provides an Ollama-compatible REST API (port 52625) and a CLI. Models are stored in ~/.config/flm/ by default; override with the FLM_MODEL_PATH environment variable. Usage: flm validate # check NPU driver + firmware health flm run llama3.2:1b # interactive chat (downloads model on first run) flm serve llama3.2:1b # OpenAI-compatible server on port 52625 flm list # list available models flm pull # pre-download a model System requirements: • Linux >= 6.14 (amdxdna in-tree) or amdxdna-dkms on older kernels • linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0) • Unlimited memlock for the flm process, e.g. in NixOS: security.pam.loginLimits = [{ domain = "*"; type = "-"; item = "memlock"; value = "unlimited"; }]; License note: CLI/orchestration code is MIT. The bundled NPU kernel shared libraries are proprietary (free for commercial use up to USD 10 M annual revenue). See LICENSE_BINARY.txt upstream. ''; homepage = "https://fastflowlm.com"; license = with licenses; [ mit unfreeRedistributable ]; mainProgram = "flm"; platforms = [ "x86_64-linux" ]; maintainers = [ ]; }; }