170 lines
5.1 KiB
Nix
Executable File
170 lines
5.1 KiB
Nix
Executable File
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
inputs,
|
|
system,
|
|
namespace,
|
|
...
|
|
}:
|
|
with lib;
|
|
let
|
|
inherit (lib.${namespace}) mkOpt;
|
|
|
|
cfg = config.${namespace}.services.ai;
|
|
|
|
ntfyModelFailScript = pkgs.writeShellScript "update-gemma-model-notify-failure" ''
|
|
HOST="$(${pkgs.hostname}/bin/hostname)"
|
|
${pkgs.curl}/bin/curl -sf \
|
|
--user "$NTFY_USER:$NTFY_PASSWORD" \
|
|
-H "Title: Gemma model update FAILED on $HOST" \
|
|
-H "Priority: high" \
|
|
-H "Tags: rotating_light,robot_face" \
|
|
-d "The daily update-gemma-model job failed. Check: journalctl -u update-gemma-model.service" \
|
|
"https://ntfy.mjallen.dev/builds" || true
|
|
'';
|
|
|
|
aiConfig = lib.${namespace}.mkModule {
|
|
inherit config;
|
|
name = "ai";
|
|
description = "AI Services";
|
|
options = {
|
|
llama-cpp = {
|
|
model = mkOpt types.str "gemma-4-26B-A4B-it-UD-Q8_K_XL" "";
|
|
};
|
|
};
|
|
moduleConfig = {
|
|
services = {
|
|
ollama = {
|
|
inherit (cfg) openFirewall;
|
|
enable = true;
|
|
package = pkgs.ollama-rocm;
|
|
port = 11434;
|
|
host = "0.0.0.0";
|
|
user = "nix-apps";
|
|
group = "jallen-nas";
|
|
rocmOverrideGfx = "11.0.2";
|
|
loadModels = [ ];
|
|
home = "${cfg.configDir}/ollama";
|
|
};
|
|
|
|
llama-cpp = {
|
|
inherit (cfg) openFirewall;
|
|
enable = true;
|
|
port = 8127;
|
|
host = "0.0.0.0";
|
|
model = "${cfg.configDir}/llama-cpp/models/${cfg.llama-cpp.model}.gguf";
|
|
package = inputs.llama-cpp.packages.${system}.rocm;
|
|
extraFlags = [
|
|
"--jinja"
|
|
"--chat-template-kwargs"
|
|
"{\"enable_thinking\":true}"
|
|
"--temp"
|
|
"1.0"
|
|
"--top-p"
|
|
"0.95"
|
|
"--top-k"
|
|
"64"
|
|
"--ctx-size"
|
|
"32768"
|
|
"--threads"
|
|
"8"
|
|
"--batch-size"
|
|
"512"
|
|
"--gpu-layers"
|
|
"999"
|
|
"--flash-attn"
|
|
"on"
|
|
"--mlock"
|
|
];
|
|
};
|
|
|
|
open-webui = {
|
|
inherit (cfg) openFirewall;
|
|
enable = true;
|
|
package = pkgs.open-webui;
|
|
host = "0.0.0.0";
|
|
port = 8888;
|
|
environmentFile = config.sops.secrets."jallen-nas/open-webui".path;
|
|
environment = {
|
|
OPENID_PROVIDER_URL = "https://authentik.mjallen.dev/application/o/chat/.well-known/openid-configuration";
|
|
OAUTH_PROVIDER_NAME = "authentik";
|
|
OPENID_REDIRECT_URI = "https://chat.mjallen.dev/oauth/oidc/callback";
|
|
ENABLE_OAUTH_SIGNUP = "False";
|
|
OAUTH_MERGE_ACCOUNTS_BY_EMAIL = "True";
|
|
ENABLE_SIGNUP = "False";
|
|
ENABLE_LOGIN_FORM = "False";
|
|
ANONYMIZED_TELEMETRY = "False";
|
|
DO_NOT_TRACK = "True";
|
|
SCARF_NO_ANALYTICS = "True";
|
|
OLLAMA_API_BASE_URL = "http://127.0.0.1:11434";
|
|
LOCAL_FILES_ONLY = "False";
|
|
WEBUI_AUTH = "False";
|
|
};
|
|
};
|
|
};
|
|
|
|
# Model update script using HuggingFace Hub
|
|
environment.systemPackages = with pkgs; [
|
|
amdgpu_top
|
|
python3Packages.huggingface-hub
|
|
];
|
|
|
|
# Systemd service for automatic model updates
|
|
systemd = {
|
|
services = {
|
|
update-gemma-model = {
|
|
description = "Update Gemma 4 model from HuggingFace";
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
ExecStart = "${pkgs.writeShellScript "update-gemma-model" ''
|
|
set -euo pipefail
|
|
|
|
MODEL_DIR="${cfg.configDir}/llama-cpp/models"
|
|
MODEL_NAME="${cfg.llama-cpp.model}.gguf"
|
|
REPO_ID="unsloth/gemma-4-26B-A4B-it-GGUF"
|
|
|
|
# Create model directory if it doesn't exist
|
|
mkdir -p "$MODEL_DIR"
|
|
|
|
# Download the latest version of the model
|
|
echo "Updating $MODEL_NAME from HuggingFace..."
|
|
${pkgs.python3Packages.huggingface-hub}/bin/hf download \
|
|
"$REPO_ID" \
|
|
"$MODEL_NAME" \
|
|
--local-dir "$MODEL_DIR"
|
|
|
|
echo "Model updated successfully"
|
|
''}";
|
|
User = "nix-apps";
|
|
Group = "jallen-nas";
|
|
EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
|
|
};
|
|
unitConfig.OnFailure = "update-gemma-model-notify-failure.service";
|
|
# Run daily at 3 AM
|
|
startAt = "*-*-* 03:00:00";
|
|
};
|
|
|
|
update-gemma-model-notify-failure = {
|
|
description = "Notify ntfy on update-gemma-model failure";
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
ExecStart = "${ntfyModelFailScript}";
|
|
EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
|
|
};
|
|
};
|
|
|
|
# Ensure model is available before llama-cpp starts
|
|
llama-cpp = {
|
|
after = [ "update-gemma-model.service" ];
|
|
wants = [ "update-gemma-model.service" ];
|
|
};
|
|
};
|
|
};
|
|
};
|
|
};
|
|
in
|
|
{
|
|
imports = [ aiConfig ];
|
|
}
|