This commit is contained in:
mjallen18
2026-03-25 22:24:19 -05:00
parent ab81e78b60
commit e119ffaabb
7 changed files with 1637 additions and 517 deletions

View File

@@ -55,7 +55,12 @@
# 3. If the tokenizers-cpp submodule rev changed (check .gitmodules / git
# submodule status), update `tokenizersRev` and `tokenizersHash`:
# nix-prefetch-git --url .../tokenizers-cpp --rev <REV> --fetch-submodules
# 4. Update `cargoVendorHash`: set to lib.fakeHash, run nix build, copy hash.
# 4. If tokenizers changes, regenerate cargoLockContents by running:
# nix shell nixpkgs#cargo --command sh -c \
# 'cp third_party/tokenizers-cpp/rust/Cargo.toml . && \
# cp third_party/tokenizers-cpp/rust/src src -r && \
# cargo generate-lockfile && cat Cargo.lock'
# and replace the cargoLockContents string below.
let
version = "0.9.36";
@@ -76,13 +81,729 @@ let
fetchSubmodules = true;
};
# Vendor the Rust crates from tokenizers-cpp/rust/Cargo.toml offline.
# This fixed-output derivation has network access; everything else is sandboxed.
# To compute the hash: set to lib.fakeHash → nix build → copy printed hash.
cargoVendorDir = rustPlatform.fetchCargoVendor {
src = tokenizers-cpp-src;
sourceRoot = "source/rust";
hash = lib.fakeHash; # FIXME: replace after first successful build attempt
# tokenizers-cpp/rust/Cargo.toml has no Cargo.lock (it's a library crate).
# We embed a generated lock file so rustPlatform.importCargoLock can vendor
# the deps offline without needing network access in the main build sandbox.
#
# Generated with:
# nix shell nixpkgs#cargo --command cargo generate-lockfile
# inside a directory containing Cargo.toml with the same dependencies.
cargoLockContents = ''
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "ahash"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if",
"getrandom",
"once_cell",
"serde",
"version_check",
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "base64"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
[[package]]
name = "bitflags"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
[[package]]
name = "castaway"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a"
dependencies = [
"rustversion",
]
[[package]]
name = "cc"
version = "1.2.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "compact_str"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
dependencies = [
"castaway",
"cfg-if",
"itoa",
"rustversion",
"ryu",
"serde",
"static_assertions",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "darling"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]]
name = "dary_heap"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04"
dependencies = [
"serde",
]
[[package]]
name = "derive_builder"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
dependencies = [
"derive_builder_macro",
]
[[package]]
name = "derive_builder_core"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "derive_builder_macro"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
dependencies = [
"derive_builder_core",
"syn",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "esaxx-rs"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
[[package]]
name = "find-msvc-tools"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "getrandom"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasip2",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "itertools"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
[[package]]
name = "libc"
version = "0.2.183"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "macro_rules_attribute"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520"
dependencies = [
"macro_rules_attribute-proc_macro",
"paste",
]
[[package]]
name = "macro_rules_attribute-proc_macro"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30"
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "monostate"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67"
dependencies = [
"monostate-impl",
"serde",
"serde_core",
]
[[package]]
name = "monostate-impl"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "once_cell"
version = "1.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
[[package]]
name = "onig"
version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0"
dependencies = [
"bitflags",
"libc",
"once_cell",
"onig_sys",
]
[[package]]
name = "onig_sys"
version = "69.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "paste"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "rand"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-cond"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f"
dependencies = [
"either",
"itertools",
"rayon",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "regex"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "ryu"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [
"itoa",
"memchr",
"serde",
"serde_core",
"zmij",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "spm_precompiled"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
dependencies = [
"base64",
"nom",
"serde",
"unicode-segmentation",
]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tokenizers"
version = "0.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476"
dependencies = [
"ahash",
"aho-corasick",
"compact_str",
"dary_heap",
"derive_builder",
"esaxx-rs",
"getrandom",
"itertools",
"log",
"macro_rules_attribute",
"monostate",
"onig",
"paste",
"rand",
"rayon",
"rayon-cond",
"regex",
"regex-syntax",
"serde",
"serde_json",
"spm_precompiled",
"thiserror",
"unicode-normalization-alignments",
"unicode-segmentation",
"unicode_categories",
]
[[package]]
name = "tokenizers-c"
version = "0.1.0"
dependencies = [
"ahash",
"serde",
"serde_json",
"tokenizers",
]
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "unicode-normalization-alignments"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
dependencies = [
"smallvec",
]
[[package]]
name = "unicode-segmentation"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da36089a805484bcccfffe0739803392c8298778a2d2f09febf76fac5ad9025b"
[[package]]
name = "unicode_categories"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wasip2"
version = "1.0.2+wasi-0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "wit-bindgen"
version = "0.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
[[package]]
name = "zerocopy"
version = "0.8.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
'';
# Build the cargo vendor directory from the embedded lock file.
# importCargoLock handles projects without a committed Cargo.lock by
# accepting the lock contents directly.
cargoVendorDir = rustPlatform.importCargoLock {
lockFileContents = cargoLockContents;
};
in
@@ -96,7 +817,7 @@ stdenv.mkDerivation rec {
rev = "v${version}";
# We do NOT fetch submodules here — tokenizers-cpp is injected separately
# (above) so that its Rust deps can be vendored in a fixed-output derivation.
hash = "sha256-uq/ZxvJA5HTJbMxofO4Hrz7ULvV1fPC7OHRXulMqwqw=";
hash = "sha256-f/Q5lTplu2umMrTaLN3pgdmkPEMgrDIu7eoMZ3ZogO0=";
};
nativeBuildInputs = [
@@ -142,19 +863,24 @@ stdenv.mkDerivation rec {
];
# CMakeLists.txt lives in src/, not the repo root.
cmakeDir = "src";
# The cmake hook runs from a build/ subdirectory, so ../src resolves to
# $sourceRoot/src where the actual CMakeLists.txt lives.
cmakeDir = "../src";
preConfigure = ''
# 1. Populate the tokenizers-cpp submodule directory
# CMakeLists.txt references the submodule as:
# 1. Populate the tokenizers-cpp submodule directory
# CMakeLists.txt references it as:
# add_subdirectory(''${CMAKE_SOURCE_DIR}/../third_party/tokenizers-cpp ...)
# The cmake setup hook unpacks sources to $TMPDIR/source; we write the
# submodule content there before cmake is invoked.
mkdir -p third_party/tokenizers-cpp
cp -r --no-preserve=mode,ownership "${tokenizers-cpp-src}/." \
third_party/tokenizers-cpp/
# 2. Configure cargo to use the pre-vendored crates (offline)
# 2. Write Cargo.lock (tokenizers-cpp omits it; it's a library crate)
cat > third_party/tokenizers-cpp/rust/Cargo.lock << 'CARGOLOCK'
${cargoLockContents}
CARGOLOCK
# 3. Point cargo at the pre-vendored crates (offline build)
mkdir -p third_party/tokenizers-cpp/rust/.cargo
cat > third_party/tokenizers-cpp/rust/.cargo/config.toml << EOF
[source.crates-io]

View File

@@ -4,109 +4,390 @@
fetchFromGitHub,
cmake,
ninja,
git,
pkg-config,
python3,
python3Packages,
# Boost (filesystem + program_options required by XRT)
boost,
# OpenCL (headers + ICD loader — XRT uses FindOpenCL)
opencl-headers,
ocl-icd,
# Core system libraries
curl,
openssl,
systemd,
elfutils, # libelf + libdw
libdrm,
libuuid,
ncurses,
protobuf,
elfutils,
libyaml,
zlib,
# protobuf for XRT's RPC/metadata serialisation
protobuf, # provides both the library and the protoc binary
# rapidjson (header-only, but XRT finds it via cmake)
rapidjson,
util-linux, # provides libuuid
xz, # provides liblzma
# systemd for libudev (device enumeration)
systemd,
# patchelf for post-install fixup
patchelf,
autoPatchelfHook,
# writeTextFile to create the systemtap SDT stub header
runCommandLocal,
}:
# AMD XRT (Xilinx Runtime) userspace library for NPU (XDNA 2) devices.
# AMD XRT + XDNA Shim — built together from amd/xdna-driver.
#
# This package builds the XRT base library from the commit pinned as a
# submodule in amd/xdna-driver. It provides:
# $out/lib/libxrt_coreutil.so — core utility library (linked by flm)
# $out/lib/libxrt_core.so — platform-independent core
# $out/include/xrt/ — public C++ headers
# This package produces:
# $out/lib/libxrt_coreutil.so — XRT core utility library
# $out/lib/libxrt_core.so — XRT platform-independent core
# $out/lib/xrt/module/
# libxrt_driver_xdna.so — XDNA NPU shim plugin
# $out/include/xrt/ — Public C++ headers
# $out/include/experimental/
# $out/bin/xrt-smi — System management tool
#
# The xrt source tree lives under the src/ subdirectory of the Xilinx/XRT
# repository (see src/CMakeLists.txt which includes CMake/nativeLnx.cmake).
# Build strategy
# ==============
# amd/xdna-driver is the canonical build system for the XDNA stack.
# It brings in Xilinx/XRT as a submodule and builds it with XRT_NPU=1,
# excluding PCIe/Alveo/edge components. Then it builds the XDNA shim
# (src/shim/) which links against libxrt_core + libxrt_coreutil.
#
# XRT version 2.19.0 — pinned to the commit used by amd/xdna-driver main
# as of 2026-03-25 (xrt @ 481583d).
# XRT itself has several required submodules that must be injected:
# src/runtime_src/core/common/aiebu — AIE binary utilities (NPU required)
# src/runtime_src/xdp — XRT Data Platform (profiling)
# src/runtime_src/core/common/elf — ELFIO header library
# src/runtime_src/core/common/gsl — Microsoft GSL headers
# src/runtime_src/aie-rt — AIE runtime
# aiebu itself has sub-submodules (aie-rt, ELFIO, cxxopts).
#
# Runtime note: this package only provides the userspace library. The
# kernel driver (amdxdna.ko) is a separate concern:
# • Linux >= 6.14 ships it in-tree (boot.kernelPackages.linux_latest).
# • Older kernels can use hardware.amdxdna.enable (once packaged).
# We build with -DSKIP_KMOD=1 so the kernel module (amdxdna.ko) is
# NOT compiled here — it ships in-tree since Linux 6.14.
#
# To update
# =========
# 1. Pin xdnaRev to the desired amd/xdna-driver commit.
# 2. Update xdnaHash: nix-prefetch-git --url .../xdna-driver --rev <REV>
# 3. Confirm the xrt submodule ref:
# curl -s "https://api.github.com/repos/amd/xdna-driver/contents/xrt?ref=<REV>" \
# | python3 -c "import sys,json; print(json.load(sys.stdin)['sha'])"
# 4. For each changed submodule, update the corresponding fetchFromGitHub below
# and re-run nix-prefetch-git to get the new hash.
stdenv.mkDerivation rec {
pname = "xrt";
version = "2.19.0";
let
# amd/xdna-driver — driver + shim build system
xdnaRev = "66fd7aef0fe53b1b712141047326ea767488e2e9";
src = fetchFromGitHub {
# Xilinx/XRT submodule commit pinned by the xdnaRev above
xrtRev = "481583db9a26cb506a37cab7f1881ae7c7de2f32";
# XRT version strings — must match what settings.cmake sets for xrtRev
xrtVersionMajor = "2";
xrtVersionMinor = "23";
xrtVersionPatch = "0";
xrtVersion = "${xrtVersionMajor}.${xrtVersionMinor}.${xrtVersionPatch}";
# ── SystemTap SDT stub ────────────────────────────────────────────────────
# XRT unconditionally includes <sys/sdt.h> for DTrace/SystemTap tracing
# probes. Nixpkgs does not package systemtap-sdt-dev as a standalone
# header package. We provide a minimal stub that satisfies the include
# without pulling in any runtime tracing infrastructure.
systemtap-sdt-stub = runCommandLocal "systemtap-sdt-stub" { } ''
mkdir -p "$out/include/sys"
cat > "$out/include/sys/sdt.h" << 'EOF'
/* Minimal SystemTap SDT stub for building XRT on NixOS.
* All probe macros expand to nothing tracing is disabled at compile time. */
#pragma once
#ifndef _SYS_SDT_H
#define _SYS_SDT_H
#define STAP_PROBEV(provider, name, ...) do {} while (0)
#define DTRACE_PROBE(provider, name) do {} while (0)
#define DTRACE_PROBE1(provider, name, a1) do {} while (0)
#define DTRACE_PROBE2(p, n, a1, a2) do {} while (0)
#define DTRACE_PROBE3(p, n, a1, a2, a3) do {} while (0)
#define DTRACE_PROBE4(p, n, a1, a2, a3, a4) do {} while (0)
#define DTRACE_PROBE5(p, n, a1, a2, a3, a4, a5) do {} while (0)
#endif /* _SYS_SDT_H */
EOF
'';
# ── XRT source ────────────────────────────────────────────────────────────
xrt-src = fetchFromGitHub {
owner = "Xilinx";
repo = "XRT";
rev = "481583db9a26cb506a37cab7f1881ae7c7de2f32";
rev = xrtRev;
hash = "sha256-WLZDjuuEGd3i77zXpAJkfQy/AszdSQ9pagy64yGX58Q=";
fetchSubmodules = false; # XRT submodules are Windows-only tools
fetchSubmodules = false; # We inject all submodules manually below
};
# ── XRT submodules (fetched individually, injected in preConfigure) ───────
# AIE binary utilities — required for NPU support
xrt-aiebu = fetchFromGitHub {
owner = "Xilinx";
repo = "aiebu";
rev = "fb62863a0f3d8eaf79b88231603f9d464d0afc24";
hash = "sha256-iKnT/U2T1Q9WGMpTLtJGZZg2OLalhXaHOo9ZdEGznoM=";
fetchSubmodules = false; # aiebu sub-submodules injected separately
};
# XRT Data Platform (performance profiling infrastructure)
xrt-xdp = fetchFromGitHub {
owner = "Xilinx";
repo = "XDP";
rev = "a868510682074c5a52ca100bb33dbf2e39ea9e9b";
hash = "sha256-DTIPK+/ShX7xZerMBMU5rxIXjwVx2yViGwNa9qhN5TM=";
};
# ELFIO — C++ ELF reader/writer (used by XRT core)
xrt-elfio = fetchFromGitHub {
owner = "serge1";
repo = "ELFIO";
rev = "f849001fc229c2598f8557e0df22866af194ef98";
hash = "sha256-/H+ajhOx6q4cHatJUMhJP2DPrWNtm+qlfNfDXbBjOEw=";
};
# Microsoft GSL — Guidelines Support Library headers
xrt-gsl = fetchFromGitHub {
owner = "microsoft";
repo = "GSL";
rev = "a3534567187d2edc428efd3f13466ff75fe5805c";
hash = "sha256-cXDFqt2KgMFGfdh6NGE+JmP4R0Wm9LNHM0eIblYe6zU=";
};
# AIE runtime — pinned by XRT directly
xrt-aie-rt = fetchFromGitHub {
owner = "Xilinx";
repo = "aie-rt";
rev = "a8b0667133ea2851ce27793a1796c5968226d9af";
hash = "sha256-VZxFpc60O5LXuVJ5AajDUtGrfXU+NIlyY+M87EHkIac=";
};
# ── aiebu sub-submodules ──────────────────────────────────────────────────
# AIE runtime version pinned by aiebu (may differ from xrt-aie-rt above)
aiebu-aie-rt = fetchFromGitHub {
owner = "Xilinx";
repo = "aie-rt";
rev = "8849e208bdcc533b20a0ed3f95c1ce961dee9c3a";
hash = "sha256-CiczNOcjsRd+163gRqGpjNW9ap/6ntHNV8ImkLssWts=";
};
# ELFIO version pinned by aiebu
aiebu-elfio = fetchFromGitHub {
owner = "serge1";
repo = "ELFIO";
rev = "182248f364e6375eaad30cefdd6b67660abaa3b3";
hash = "sha256-wg4Sed3xKJIkc0F3qOQzLdo0rL/so1fpa6nS9rpzB/Q=";
};
# cxxopts — command-line option parser used by aiebu tools
aiebu-cxxopts = fetchFromGitHub {
owner = "jarro2783";
repo = "cxxopts";
rev = "10a7a647791fa3a24ec4f572f2573a6e0aaa881b";
hash = "sha256-QrqNe2XICY+Ej0A01XIoigIpv/YOh07cV/PGeYyIgMA=";
};
in
stdenv.mkDerivation rec {
pname = "xrt";
version = xrtVersion;
src = fetchFromGitHub {
owner = "amd";
repo = "xdna-driver";
rev = xdnaRev;
hash = "sha256-oOp1MiscLez6yxT59Aw/ZeO9JXuB8pz1fm6b64IgM3E=";
fetchSubmodules = false; # We inject xrt manually below
};
nativeBuildInputs = [
cmake
ninja
git
pkg-config
python3
python3Packages.pybind11
autoPatchelfHook
patchelf
protobuf # provides protoc binary
];
buildInputs = [
boost
opencl-headers
ocl-icd
curl
openssl
systemd # for libudev (device enumeration)
elfutils
libdrm
libuuid
ncurses
protobuf
elfutils # libelf
libyaml
zlib
protobuf
rapidjson
util-linux # libuuid
xz # liblzma
systemd # libudev
systemtap-sdt-stub # sys/sdt.h — stub header for XRT tracing probes
];
# XRT's CMakeLists.txt is in the src/ subdirectory.
cmakeDir = "src";
preConfigure = ''
# 1. Inject the XRT submodule
cp -r --no-preserve=mode,ownership "${xrt-src}/." xrt/
# Make xrt/ look like a git repo so find_package(Git) and any git
# invocations inside the build don't escape the sandbox.
git init xrt
git -C xrt config user.email "nix@build"
git -C xrt config user.name "Nix Build"
git -C xrt add -A
git -C xrt commit -m "xrt source" --allow-empty
# 2. Inject XRT's required submodules
# These are all needed for the NPU (XRT_NPU=1) build path.
cp -r --no-preserve=mode,ownership "${xrt-aiebu}/." \
xrt/src/runtime_src/core/common/aiebu/
cp -r --no-preserve=mode,ownership "${xrt-xdp}/." \
xrt/src/runtime_src/xdp/
cp -r --no-preserve=mode,ownership "${xrt-elfio}/." \
xrt/src/runtime_src/core/common/elf/
cp -r --no-preserve=mode,ownership "${xrt-gsl}/." \
xrt/src/runtime_src/core/common/gsl/
cp -r --no-preserve=mode,ownership "${xrt-aie-rt}/." \
xrt/src/runtime_src/aie-rt/
# 3. Inject aiebu's own sub-submodules
cp -r --no-preserve=mode,ownership "${aiebu-aie-rt}/." \
xrt/src/runtime_src/core/common/aiebu/lib/aie-rt/
cp -r --no-preserve=mode,ownership "${aiebu-elfio}/." \
xrt/src/runtime_src/core/common/aiebu/src/cpp/ELFIO/
cp -r --no-preserve=mode,ownership "${aiebu-cxxopts}/." \
xrt/src/runtime_src/core/common/aiebu/src/cpp/cxxopts/
# 4. Patch hardcoded /bins path
# CMakeLists.txt line 30 uses plain set(XDNA_BIN_DIR /bins) which shadows
# any -D cache variable we pass. Rewrite it to use CMAKE_INSTALL_PREFIX
# so the secondary "quick testing" install lands inside $out.
substituteInPlace CMakeLists.txt \
--replace-fail \
'set(XDNA_BIN_DIR /bins)' \
'set(XDNA_BIN_DIR ''${CMAKE_INSTALL_PREFIX})'
# 5. Patch pkg.cmake to handle NixOS
# pkg.cmake reads /etc/os-release at configure-time and issues a
# FATAL_ERROR for unrecognised distros (which includes NixOS).
# CPack is never invoked in a Nix build only cmake --install runs.
python3 << 'PYEOF'
import re, pathlib
p = pathlib.Path('CMake/pkg.cmake')
txt = p.read_text()
txt = re.sub(
r'else\(\)\s*message\(FATAL_ERROR.*?endif\(\)',
'else()\n'
' message(STATUS "Unrecognised distro, falling back to DEB CPack generator (NixOS build).")\n'
' set(CPACK_GENERATOR "DEB")\n'
' set(CPACK_DEB_COMPONENT_INSTALL ON)\n'
' set(CPACK_DEBIAN_PACKAGE_DEPENDS "")\n'
'endif()',
txt,
flags=re.DOTALL
)
p.write_text(txt)
print('CMake/pkg.cmake patched')
PYEOF
'';
cmakeFlags = [
"-DCMAKE_BUILD_TYPE=Release"
"-DCMAKE_INSTALL_PREFIX=${placeholder "out"}"
# Build the NPU/XDNA variant (skips PCIe FPGA-specific components).
"-DXRT_NATIVE_BUILD=yes"
# Disable components we do not need:
"-DXRT_ENABLE_WERROR=OFF"
# Install libraries to lib/ (some builds default to lib64/).
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DSKIP_KMOD=1"
"-DBUILD_VXDNA=OFF"
"-DXRT_UPSTREAM_DEBIAN=1"
"-DXRT_VERSION_MAJOR=${xrtVersionMajor}"
"-DXRT_VERSION_MINOR=${xrtVersionMinor}"
"-DXRT_VERSION_PATCH=${xrtVersionPatch}"
"-DXRT_BUILD_NUMBER=${xrtVersionPatch}"
"-DXRT_ENABLE_WERROR=OFF"
"-DOpenCL_INCLUDE_DIR=${opencl-headers}/include"
"-DOpenCL_LIBRARY=${ocl-icd}/lib/libOpenCL.so"
];
# XRT's install target places a setup.sh in the prefix root; we don't need
# that for Nix — the binary wrapper / RPATH mechanism handles library lookup.
postInstall = ''
# Remove the CMake-generated setup.sh not needed in a Nix env.
installPhase = ''
runHook preInstall
# Install the xdna shim + XRT libs (the normal install target).
cmake --install . --prefix "$out"
# XRT is built EXCLUDE_FROM_ALL so its header install rules are not in
# the default install. The component-based install (--component xrt_base_dev)
# embeds the absolute build-dir path in the destination and produces the
# wrong layout (headers go to $out/nix/store/$out/include/ instead of
# $out/include/).
#
# Instead, copy the headers directly from the XRT source tree the public
# API surface is fully captured in src/runtime_src/core/include/.
mkdir -p "$out/include"
cp -r --no-preserve=mode,ownership \
xrt/src/runtime_src/core/include/. \
"$out/include/"
# Also install the generated version headers from the build directory.
if [ -d "xrt/src/gen" ]; then
cp -r --no-preserve=mode,ownership xrt/src/gen/. "$out/include/"
fi
runHook postInstall
'';
postFixup = ''
rpath="${lib.makeLibraryPath buildInputs}:$out/lib"
for so in "$out/lib/xrt/module"/*.so; do
[ -f "$so" ] && patchelf --set-rpath "$rpath" "$so" 2>/dev/null || true
done
for so in "$out/lib"/*.so "$out/lib"/*.so.*; do
[ -f "$so" ] && patchelf --set-rpath "$rpath" "$so" 2>/dev/null || true
done
for bin in "$out/bin"/*; do
[ -f "$bin" ] && patchelf --set-rpath "$rpath" "$bin" 2>/dev/null || true
done
rm -f "$out"/setup.sh "$out"/setup.csh 2>/dev/null || true
'';
meta = with lib; {
description = "AMD XRT (Xilinx Runtime) userspace library for XDNA NPUs";
description = "AMD XRT runtime and XDNA NPU shim for Ryzen AI NPUs";
longDescription = ''
XRT is the userspace component of AMD's XRT stack for their FPGA and
NPU devices. This package builds only the base library
(libxrt_coreutil, libxrt_core) that FastFlowLM links against to
communicate with the AMD XDNA 2 NPU via the amdxdna kernel driver.
AMD XRT (Xilinx Runtime) is the userspace runtime for AMD XDNA 2 NPU
devices (Ryzen AI Strix Point, Strix Halo, Kraken Point, Gorgon Point).
The kernel driver (amdxdna.ko) is built in since Linux 6.14.
For older kernels it can be loaded via a DKMS package.
This package builds the XRT base libraries (libxrt_coreutil, libxrt_core)
together with the XDNA shim plugin (libxrt_driver_xdna) from the
amd/xdna-driver repository.
The kernel driver (amdxdna.ko) ships in-tree since Linux 6.14 and is
available for older kernels via a DKMS package. NPU firmware is
provided by the linux-firmware package (version >= 20260221 required).
Usage:
xrt-smi validate validate the NPU setup end-to-end
xrt-smi examine query the NPU device
'';
homepage = "https://github.com/Xilinx/XRT";
homepage = "https://github.com/amd/xdna-driver";
license = licenses.asl20;
platforms = [ "x86_64-linux" ];
maintainers = [ ];