Files
nix-config/modules/nixos/services/ai/default.nix
2026-04-05 19:10:23 -05:00

177 lines
5.2 KiB
Nix
Executable File

{
config,
lib,
pkgs,
inputs,
system,
namespace,
...
}:
with lib;
let
inherit (lib.${namespace}) mkOpt;
cfg = config.${namespace}.services.ai;
ntfyModelFailScript = pkgs.writeShellScript "update-qwen-model-notify-failure" ''
HOST="$(${pkgs.hostname}/bin/hostname)"
${pkgs.curl}/bin/curl -sf \
--user "$NTFY_USER:$NTFY_PASSWORD" \
-H "Title: Qwen model update FAILED on $HOST" \
-H "Priority: high" \
-H "Tags: rotating_light,robot_face" \
-d "The daily update-qwen-model job failed. Check: journalctl -u update-qwen-model.service" \
"https://ntfy.mjallen.dev/builds" || true
'';
aiConfig = lib.${namespace}.mkModule {
inherit config;
name = "ai";
description = "AI Services";
options = {
llama-cpp = {
model =
mkOpt types.str
"models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/b8654b48d979f2853b7a81d6541ca64eea7dc3c5/gemma-4-26B-A4B-it-UD-Q8_K_XL"
"";
};
};
moduleConfig = {
services = {
ollama = {
inherit (cfg) openFirewall;
enable = true;
package = pkgs.ollama-rocm;
port = 11434;
host = "0.0.0.0";
user = "nix-apps";
group = "jallen-nas";
rocmOverrideGfx = "11.0.2";
loadModels = [ ];
home = "${cfg.configDir}/ollama";
};
llama-cpp = {
inherit (cfg) openFirewall;
enable = true;
port = 8127;
host = "0.0.0.0";
model = "${cfg.configDir}/llama-cpp/models/${cfg.llama-cpp.model}.gguf";
package = inputs.llama-cpp.packages.${system}.rocm;
extraFlags = [
"--fit"
"on"
"--seed"
"3407"
"--temp"
"0.7"
"--top-p"
"0.9"
"--min-p"
"0.05"
"--top-k"
"30"
"--jinja"
"--ctx-size"
"131072"
"--threads"
"8"
"--batch-size"
"512"
"--gpu-layers"
"999"
"--flash-attn"
"auto"
"--mlock"
];
};
open-webui = {
inherit (cfg) openFirewall;
enable = true;
package = pkgs.open-webui;
host = "0.0.0.0";
port = 8888;
environmentFile = config.sops.secrets."jallen-nas/open-webui".path;
environment = {
OPENID_PROVIDER_URL = "https://authentik.mjallen.dev/application/o/chat/.well-known/openid-configuration";
OAUTH_PROVIDER_NAME = "authentik";
OPENID_REDIRECT_URI = "https://chat.mjallen.dev/oauth/oidc/callback";
ENABLE_OAUTH_SIGNUP = "False";
OAUTH_MERGE_ACCOUNTS_BY_EMAIL = "True";
ENABLE_SIGNUP = "False";
ENABLE_LOGIN_FORM = "False";
ANONYMIZED_TELEMETRY = "False";
DO_NOT_TRACK = "True";
SCARF_NO_ANALYTICS = "True";
OLLAMA_API_BASE_URL = "http://127.0.0.1:11434";
LOCAL_FILES_ONLY = "False";
WEBUI_AUTH = "False";
};
};
};
# Model update script using HuggingFace Hub
environment.systemPackages = with pkgs; [
amdgpu_top
python3Packages.huggingface-hub
];
# Systemd service for automatic model updates
systemd = {
services = {
update-qwen-model = {
description = "Update Qwen3-Coder-Next model from HuggingFace";
serviceConfig = {
Type = "oneshot";
ExecStart = "${pkgs.writeShellScript "update-qwen-model" ''
set -euo pipefail
MODEL_DIR="${cfg.configDir}/llama-cpp/models"
MODEL_NAME="${cfg.llama-cpp.model}.gguf"
REPO_ID="unsloth/Qwen3-Coder-Next-GGUF"
# Create model directory if it doesn't exist
mkdir -p "$MODEL_DIR"
# Download the latest version of the model
echo "Updating $MODEL_NAME from HuggingFace..."
${pkgs.python3Packages.huggingface-hub}/bin/hf download \
"$REPO_ID" \
"$MODEL_NAME" \
--local-dir "$MODEL_DIR"
echo "Model updated successfully"
''}";
User = "nix-apps";
Group = "jallen-nas";
EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
};
unitConfig.OnFailure = "update-qwen-model-notify-failure.service";
# Run daily at 3 AM
startAt = "*-*-* 03:00:00";
};
update-qwen-model-notify-failure = {
description = "Notify ntfy on update-qwen-model failure";
serviceConfig = {
Type = "oneshot";
ExecStart = "${ntfyModelFailScript}";
EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
};
};
# Ensure model is available before llama-cpp starts
llama-cpp = {
after = [ "update-qwen-model.service" ];
wants = [ "update-qwen-model.service" ];
};
};
};
};
};
in
{
imports = [ aiConfig ];
}