init xrt and fflm
This commit is contained in:
@@ -400,34 +400,30 @@ let
|
||||
# ntfy via the Grafana webhook contact point. Grafana POSTs a JSON
|
||||
# body; ntfy accepts any body as the message text. We use the
|
||||
# message template below to format it nicely.
|
||||
# Basic auth credentials are read from the SOPS secret at runtime
|
||||
# via Grafana's $__file{} provider.
|
||||
contactPoints.settings = {
|
||||
apiVersion = 1;
|
||||
contactPoints = [
|
||||
{
|
||||
name = "ntfy";
|
||||
receivers = [
|
||||
{
|
||||
uid = "ntfy-webhook";
|
||||
type = "webhook";
|
||||
settings = {
|
||||
url = "https://ntfy.mjallen.dev/grafana-alerts";
|
||||
httpMethod = "POST";
|
||||
username = "$__file{${config.sops.secrets."jallen-nas/ntfy/user".path}}";
|
||||
password = "$__file{${config.sops.secrets."jallen-nas/ntfy/password".path}}";
|
||||
# Pass alert title and state as ntfy headers via the
|
||||
# custom message template (defined below).
|
||||
httpHeaders = {
|
||||
"Tags" = "chart,bell";
|
||||
};
|
||||
};
|
||||
disableResolveMessage = false;
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
#
|
||||
# Credentials are injected via Grafana's $__env{} provider, which
|
||||
# reads from the process environment. The GRAFANA_NTFY_USER and
|
||||
# GRAFANA_NTFY_PASSWORD variables are set via the SOPS-managed
|
||||
# grafana.env EnvironmentFile on the grafana.service unit.
|
||||
#
|
||||
# Note: $__file{} only works in grafana.ini settings, not in
|
||||
# provisioning YAML files — using it here causes a parse error.
|
||||
contactPoints.path = pkgs.writeTextDir "contactPoints.yaml" ''
|
||||
apiVersion: 1
|
||||
contactPoints:
|
||||
- name: ntfy
|
||||
receivers:
|
||||
- uid: ntfy-webhook
|
||||
type: webhook
|
||||
disableResolveMessage: false
|
||||
settings:
|
||||
url: https://ntfy.mjallen.dev/grafana-alerts
|
||||
httpMethod: POST
|
||||
username: $__env{GRAFANA_NTFY_USER}
|
||||
password: $__env{GRAFANA_NTFY_PASSWORD}
|
||||
httpHeaders:
|
||||
Tags: "chart,bell"
|
||||
'';
|
||||
|
||||
# ── Notification message template ───────────────────────────────────
|
||||
# Grafana sends the rendered template body as the POST body.
|
||||
@@ -878,6 +874,11 @@ let
|
||||
};
|
||||
};
|
||||
|
||||
# Inject ntfy credentials into Grafana's environment so the $__env{}
|
||||
# provider in contactPoints.yaml can resolve them at runtime.
|
||||
# The grafana.env template is managed by SOPS and owned by grafana:grafana.
|
||||
systemd.services.grafana.serviceConfig.EnvironmentFile = config.sops.templates."grafana.env".path;
|
||||
|
||||
# The redis exporter needs AF_INET to reach TCP Redis instances.
|
||||
# The default systemd hardening only allows AF_UNIX.
|
||||
systemd.services.prometheus-redis-exporter.serviceConfig.RestrictAddressFamilies = [
|
||||
|
||||
248
packages/fastflowlm/default.nix
Normal file
248
packages/fastflowlm/default.nix
Normal file
@@ -0,0 +1,248 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
cmake,
|
||||
ninja,
|
||||
pkg-config,
|
||||
rustPlatform,
|
||||
cargo,
|
||||
rustc,
|
||||
# C++ build-time dependencies
|
||||
boost,
|
||||
curl,
|
||||
openssl,
|
||||
fftw,
|
||||
fftwFloat, # fftw3f (single-precision)
|
||||
fftwLongDouble, # fftw3l (long-double-precision)
|
||||
ffmpeg,
|
||||
readline,
|
||||
libdrm,
|
||||
libuuid,
|
||||
# ELF patching for the bundled proprietary .so files
|
||||
autoPatchelfHook,
|
||||
patchelf,
|
||||
gcc-unwrapped,
|
||||
# Access to other flake packages (packages/xrt)
|
||||
pkgs,
|
||||
namespace,
|
||||
}:
|
||||
|
||||
# FastFlowLM (FLM) — Ollama-style LLM runtime for AMD Ryzen AI (XDNA 2) NPUs.
|
||||
#
|
||||
# Build overview
|
||||
# ==============
|
||||
# The repository contains:
|
||||
# src/ C++20 CMake project → produces the `flm` binary
|
||||
# third_party/
|
||||
# tokenizers-cpp/ git submodule — builds tokenizers_cpp (C++) +
|
||||
# libtokenizers_c.a (Rust staticlib via cargo)
|
||||
# src/lib/*.so Proprietary NPU kernel libraries (pre-built, bundled)
|
||||
# src/xclbins/ AIE bitstreams (pre-built, loaded at runtime by .so)
|
||||
# src/model_list.json Model registry
|
||||
#
|
||||
# Runtime prerequisites (managed outside this package):
|
||||
# • Linux >= 6.14 with amdxdna in-tree driver, or amdxdna-dkms on older
|
||||
# kernels
|
||||
# • linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0)
|
||||
# • Memlock = unlimited for the FLM process
|
||||
# • packages/xrt (libxrt_coreutil) built and available
|
||||
#
|
||||
# To update to a new release
|
||||
# ==========================
|
||||
# 1. Bump `version` below.
|
||||
# 2. Update `srcHash` (run: nix-prefetch-git --url ...FastFlowLM --rev v<X>).
|
||||
# 3. If the tokenizers-cpp submodule rev changed (check .gitmodules / git
|
||||
# submodule status), update `tokenizersRev` and `tokenizersHash`:
|
||||
# nix-prefetch-git --url .../tokenizers-cpp --rev <REV> --fetch-submodules
|
||||
# 4. Update `cargoVendorHash`: set to lib.fakeHash, run nix build, copy hash.
|
||||
|
||||
let
|
||||
version = "0.9.36";
|
||||
|
||||
# XRT userspace runtime — built from packages/xrt in this flake.
|
||||
xrt = pkgs.${namespace}.xrt;
|
||||
|
||||
# ── tokenizers-cpp submodule ──────────────────────────────────────────────
|
||||
# Pinned to the commit referenced in FastFlowLM v0.9.36 .gitmodules.
|
||||
tokenizersRev = "34885cfd7b9ef27b859c28a41e71413dd31926f5";
|
||||
|
||||
tokenizers-cpp-src = fetchFromGitHub {
|
||||
owner = "mlc-ai";
|
||||
repo = "tokenizers-cpp";
|
||||
rev = tokenizersRev;
|
||||
# Includes sentencepiece + msgpack sub-submodules.
|
||||
hash = "sha256-m3A9OhCXJgvvV9UbVL/ijaUC1zkLHlddnQLqZEA5t4w=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
|
||||
# Vendor the Rust crates from tokenizers-cpp/rust/Cargo.toml offline.
|
||||
# This fixed-output derivation has network access; everything else is sandboxed.
|
||||
# To compute the hash: set to lib.fakeHash → nix build → copy printed hash.
|
||||
cargoVendorDir = rustPlatform.fetchCargoVendor {
|
||||
src = tokenizers-cpp-src;
|
||||
sourceRoot = "source/rust";
|
||||
hash = lib.fakeHash; # FIXME: replace after first successful build attempt
|
||||
};
|
||||
|
||||
in
|
||||
stdenv.mkDerivation rec {
|
||||
pname = "fastflowlm";
|
||||
inherit version;
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "FastFlowLM";
|
||||
repo = "FastFlowLM";
|
||||
rev = "v${version}";
|
||||
# We do NOT fetch submodules here — tokenizers-cpp is injected separately
|
||||
# (above) so that its Rust deps can be vendored in a fixed-output derivation.
|
||||
hash = "sha256-uq/ZxvJA5HTJbMxofO4Hrz7ULvV1fPC7OHRXulMqwqw=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
ninja
|
||||
pkg-config
|
||||
cargo
|
||||
rustc
|
||||
autoPatchelfHook
|
||||
patchelf
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
boost
|
||||
curl
|
||||
openssl
|
||||
fftw
|
||||
fftwFloat
|
||||
fftwLongDouble
|
||||
ffmpeg
|
||||
readline
|
||||
libdrm
|
||||
libuuid
|
||||
xrt
|
||||
# libstdc++ / libgcc_s needed at runtime by the bundled NPU .so files.
|
||||
gcc-unwrapped.lib
|
||||
];
|
||||
|
||||
# autoPatchelfHook uses runtimeDependencies to add NEEDED entries to the
|
||||
# ELF RPATH, covering libraries that the bundled .so files depend on.
|
||||
runtimeDependencies = [
|
||||
xrt
|
||||
gcc-unwrapped.lib
|
||||
fftw
|
||||
fftwFloat
|
||||
fftwLongDouble
|
||||
ffmpeg
|
||||
curl
|
||||
openssl
|
||||
boost
|
||||
readline
|
||||
libdrm
|
||||
];
|
||||
|
||||
# CMakeLists.txt lives in src/, not the repo root.
|
||||
cmakeDir = "src";
|
||||
|
||||
preConfigure = ''
|
||||
# ── 1. Populate the tokenizers-cpp submodule directory ───────────────────
|
||||
# CMakeLists.txt references the submodule as:
|
||||
# add_subdirectory(''${CMAKE_SOURCE_DIR}/../third_party/tokenizers-cpp ...)
|
||||
# The cmake setup hook unpacks sources to $TMPDIR/source; we write the
|
||||
# submodule content there before cmake is invoked.
|
||||
mkdir -p third_party/tokenizers-cpp
|
||||
cp -r --no-preserve=mode,ownership "${tokenizers-cpp-src}/." \
|
||||
third_party/tokenizers-cpp/
|
||||
|
||||
# ── 2. Configure cargo to use the pre-vendored crates (offline) ──────────
|
||||
mkdir -p third_party/tokenizers-cpp/rust/.cargo
|
||||
cat > third_party/tokenizers-cpp/rust/.cargo/config.toml << EOF
|
||||
[source.crates-io]
|
||||
replace-with = "vendored-sources"
|
||||
|
||||
[source.vendored-sources]
|
||||
directory = "${cargoVendorDir}"
|
||||
EOF
|
||||
'';
|
||||
|
||||
cmakeFlags = [
|
||||
# The build system requires these two version strings (checked at configure).
|
||||
"-DFLM_VERSION=${version}"
|
||||
"-DNPU_VERSION=32.0.203.311"
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
# Override the default XRT install prefix (/opt/xilinx/xrt).
|
||||
"-DXRT_INCLUDE_DIR=${xrt}/include"
|
||||
"-DXRT_LIB_DIR=${xrt}/lib"
|
||||
# xclbins/ path baked into the binary via CMAKE_XCLBIN_PREFIX.
|
||||
"-DCMAKE_XCLBIN_PREFIX=${placeholder "out"}/share/flm"
|
||||
];
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
|
||||
cmake --install . --prefix "$out"
|
||||
|
||||
# ── Copy bundled proprietary NPU kernel .so files ─────────────────────────
|
||||
# The upstream CMakeLists installs them via:
|
||||
# file(GLOB so_libs "''${CMAKE_SOURCE_DIR}/lib/*.so")
|
||||
# install(FILES ''${so_libs} DESTINATION lib)
|
||||
# and sets RPATH=$ORIGIN/../lib on the flm binary.
|
||||
# We reproduce that layout: $out/lib/lib*.so alongside $out/bin/flm.
|
||||
mkdir -p "$out/lib"
|
||||
for so in "$src/src/lib"/lib*.so; do
|
||||
install -m755 "$so" "$out/lib/"
|
||||
done
|
||||
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
# autoPatchelfHook runs automatically and patches the bundled .so files.
|
||||
# We additionally fix the RPATH on the flm binary to include both:
|
||||
# • $out/lib (bundled NPU .so files)
|
||||
# • system libs path (XRT, ffmpeg, boost, …)
|
||||
postFixup = ''
|
||||
patchelf \
|
||||
--set-rpath "${lib.makeLibraryPath buildInputs}:$out/lib" \
|
||||
"$out/bin/flm"
|
||||
'';
|
||||
|
||||
meta = with lib; {
|
||||
description = "LLM runtime for AMD Ryzen AI XDNA 2 NPUs";
|
||||
longDescription = ''
|
||||
FastFlowLM (FLM) runs large language models on AMD Ryzen AI (XDNA 2)
|
||||
NPU silicon — Strix Point, Strix Halo, Kraken Point, Gorgon Point.
|
||||
It provides an Ollama-compatible REST API (port 52625) and a CLI.
|
||||
|
||||
Models are stored in ~/.config/flm/ by default;
|
||||
override with the FLM_MODEL_PATH environment variable.
|
||||
|
||||
Usage:
|
||||
flm validate # check NPU driver + firmware health
|
||||
flm run llama3.2:1b # interactive chat (downloads model on first run)
|
||||
flm serve llama3.2:1b # OpenAI-compatible server on port 52625
|
||||
flm list # list available models
|
||||
flm pull <model> # pre-download a model
|
||||
|
||||
System requirements:
|
||||
• Linux >= 6.14 (amdxdna in-tree) or amdxdna-dkms on older kernels
|
||||
• linux-firmware >= 20260221 (NPU firmware >= 1.1.0.0)
|
||||
• Unlimited memlock for the flm process, e.g. in NixOS:
|
||||
security.pam.loginLimits = [{
|
||||
domain = "*"; type = "-";
|
||||
item = "memlock"; value = "unlimited";
|
||||
}];
|
||||
|
||||
License note: CLI/orchestration code is MIT. The bundled NPU kernel
|
||||
shared libraries are proprietary (free for commercial use up to
|
||||
USD 10 M annual revenue). See LICENSE_BINARY.txt upstream.
|
||||
'';
|
||||
homepage = "https://fastflowlm.com";
|
||||
license = with licenses; [
|
||||
mit
|
||||
unfreeRedistributable
|
||||
];
|
||||
mainProgram = "flm";
|
||||
platforms = [ "x86_64-linux" ];
|
||||
maintainers = [ ];
|
||||
};
|
||||
}
|
||||
114
packages/xrt/default.nix
Normal file
114
packages/xrt/default.nix
Normal file
@@ -0,0 +1,114 @@
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
fetchFromGitHub,
|
||||
cmake,
|
||||
ninja,
|
||||
pkg-config,
|
||||
python3,
|
||||
boost,
|
||||
curl,
|
||||
openssl,
|
||||
systemd,
|
||||
libdrm,
|
||||
ncurses,
|
||||
protobuf,
|
||||
elfutils,
|
||||
zlib,
|
||||
rapidjson,
|
||||
util-linux, # provides libuuid
|
||||
xz, # provides liblzma
|
||||
}:
|
||||
|
||||
# AMD XRT (Xilinx Runtime) userspace library for NPU (XDNA 2) devices.
|
||||
#
|
||||
# This package builds the XRT base library from the commit pinned as a
|
||||
# submodule in amd/xdna-driver. It provides:
|
||||
# $out/lib/libxrt_coreutil.so — core utility library (linked by flm)
|
||||
# $out/lib/libxrt_core.so — platform-independent core
|
||||
# $out/include/xrt/ — public C++ headers
|
||||
# $out/include/experimental/
|
||||
#
|
||||
# The xrt source tree lives under the src/ subdirectory of the Xilinx/XRT
|
||||
# repository (see src/CMakeLists.txt which includes CMake/nativeLnx.cmake).
|
||||
#
|
||||
# XRT version 2.19.0 — pinned to the commit used by amd/xdna-driver main
|
||||
# as of 2026-03-25 (xrt @ 481583d).
|
||||
#
|
||||
# Runtime note: this package only provides the userspace library. The
|
||||
# kernel driver (amdxdna.ko) is a separate concern:
|
||||
# • Linux >= 6.14 ships it in-tree (boot.kernelPackages.linux_latest).
|
||||
# • Older kernels can use hardware.amdxdna.enable (once packaged).
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
pname = "xrt";
|
||||
version = "2.19.0";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "Xilinx";
|
||||
repo = "XRT";
|
||||
rev = "481583db9a26cb506a37cab7f1881ae7c7de2f32";
|
||||
hash = "sha256-WLZDjuuEGd3i77zXpAJkfQy/AszdSQ9pagy64yGX58Q=";
|
||||
fetchSubmodules = false; # XRT submodules are Windows-only tools
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
ninja
|
||||
pkg-config
|
||||
python3
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
boost
|
||||
curl
|
||||
openssl
|
||||
systemd # for libudev (device enumeration)
|
||||
libdrm
|
||||
ncurses
|
||||
protobuf
|
||||
elfutils # libelf
|
||||
zlib
|
||||
rapidjson
|
||||
util-linux # libuuid
|
||||
xz # liblzma
|
||||
];
|
||||
|
||||
# XRT's CMakeLists.txt is in the src/ subdirectory.
|
||||
cmakeDir = "src";
|
||||
|
||||
cmakeFlags = [
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
"-DCMAKE_INSTALL_PREFIX=${placeholder "out"}"
|
||||
# Build the NPU/XDNA variant (skips PCIe FPGA-specific components).
|
||||
"-DXRT_NATIVE_BUILD=yes"
|
||||
# Disable components we do not need:
|
||||
"-DXRT_ENABLE_WERROR=OFF"
|
||||
# Install libraries to lib/ (some builds default to lib64/).
|
||||
"-DCMAKE_INSTALL_LIBDIR=lib"
|
||||
];
|
||||
|
||||
# XRT's install target places a setup.sh in the prefix root; we don't need
|
||||
# that for Nix — the binary wrapper / RPATH mechanism handles library lookup.
|
||||
postInstall = ''
|
||||
# Remove the CMake-generated setup.sh — not needed in a Nix env.
|
||||
rm -f "$out"/setup.sh "$out"/setup.csh 2>/dev/null || true
|
||||
'';
|
||||
|
||||
meta = with lib; {
|
||||
description = "AMD XRT (Xilinx Runtime) userspace library for XDNA NPUs";
|
||||
longDescription = ''
|
||||
XRT is the userspace component of AMD's XRT stack for their FPGA and
|
||||
NPU devices. This package builds only the base library
|
||||
(libxrt_coreutil, libxrt_core) that FastFlowLM links against to
|
||||
communicate with the AMD XDNA 2 NPU via the amdxdna kernel driver.
|
||||
|
||||
The kernel driver (amdxdna.ko) is built in since Linux 6.14.
|
||||
For older kernels it can be loaded via a DKMS package.
|
||||
'';
|
||||
homepage = "https://github.com/Xilinx/XRT";
|
||||
license = licenses.asl20;
|
||||
platforms = [ "x86_64-linux" ];
|
||||
maintainers = [ ];
|
||||
};
|
||||
}
|
||||
@@ -366,6 +366,19 @@ in
|
||||
];
|
||||
};
|
||||
|
||||
# Grafana reads ntfy credentials via systemd EnvironmentFile so the
|
||||
# $__env{} provider works in alerting provisioning YAML. The file
|
||||
# provider ($__file{}) only works in grafana.ini, not in provisioning.
|
||||
"grafana.env" = {
|
||||
content = ''
|
||||
GRAFANA_NTFY_USER=${config.sops.placeholder."jallen-nas/ntfy/user"}
|
||||
GRAFANA_NTFY_PASSWORD=${config.sops.placeholder."jallen-nas/ntfy/password"}
|
||||
'';
|
||||
mode = "0400";
|
||||
owner = "grafana";
|
||||
restartUnits = [ "grafana.service" ];
|
||||
};
|
||||
|
||||
# CrowdSec HTTP notification plugin config with credentials baked in.
|
||||
# The plugin process spawned by crowdsec/cscli reads this file directly.
|
||||
# Credentials are embedded in the URL using HTTP basic auth so no
|
||||
|
||||
Reference in New Issue
Block a user