cyd
This commit is contained in:
@@ -45,7 +45,7 @@ let
|
||||
discoveryURL = "https://authentik.mjallen.dev/application/o/actual/.well-known/openid-configuration";
|
||||
client_id._secret = config.sops.secrets."jallen-nas/actual/client-id".path;
|
||||
client_secret._secret = config.sops.secrets."jallen-nas/actual/client-secret".path;
|
||||
server_hostname = "https://authentik.mjallen.dev";
|
||||
server_hostname = "https://actual.mjallen.dev";
|
||||
authMethod = "openid";
|
||||
};
|
||||
};
|
||||
|
||||
@@ -13,14 +13,14 @@ let
|
||||
|
||||
cfg = config.${namespace}.services.ai;
|
||||
|
||||
ntfyModelFailScript = pkgs.writeShellScript "update-qwen-model-notify-failure" ''
|
||||
ntfyModelFailScript = pkgs.writeShellScript "update-gemma-model-notify-failure" ''
|
||||
HOST="$(${pkgs.hostname}/bin/hostname)"
|
||||
${pkgs.curl}/bin/curl -sf \
|
||||
--user "$NTFY_USER:$NTFY_PASSWORD" \
|
||||
-H "Title: Qwen model update FAILED on $HOST" \
|
||||
-H "Title: Gemma model update FAILED on $HOST" \
|
||||
-H "Priority: high" \
|
||||
-H "Tags: rotating_light,robot_face" \
|
||||
-d "The daily update-qwen-model job failed. Check: journalctl -u update-qwen-model.service" \
|
||||
-d "The daily update-gemma-model job failed. Check: journalctl -u update-gemma-model.service" \
|
||||
"https://ntfy.mjallen.dev/builds" || true
|
||||
'';
|
||||
|
||||
@@ -30,10 +30,7 @@ let
|
||||
description = "AI Services";
|
||||
options = {
|
||||
llama-cpp = {
|
||||
model =
|
||||
mkOpt types.str
|
||||
"models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/b8654b48d979f2853b7a81d6541ca64eea7dc3c5/gemma-4-26B-A4B-it-UD-Q8_K_XL"
|
||||
"";
|
||||
model = mkOpt types.str "gemma-4-26B-A4B-it-UD-Q8_K_XL" "";
|
||||
};
|
||||
};
|
||||
moduleConfig = {
|
||||
@@ -59,21 +56,17 @@ let
|
||||
model = "${cfg.configDir}/llama-cpp/models/${cfg.llama-cpp.model}.gguf";
|
||||
package = inputs.llama-cpp.packages.${system}.rocm;
|
||||
extraFlags = [
|
||||
"--fit"
|
||||
"on"
|
||||
"--seed"
|
||||
"3407"
|
||||
"--temp"
|
||||
"0.7"
|
||||
"--top-p"
|
||||
"0.9"
|
||||
"--min-p"
|
||||
"0.05"
|
||||
"--top-k"
|
||||
"30"
|
||||
"--jinja"
|
||||
"--chat-template-kwargs"
|
||||
"{\"enable_thinking\":true}"
|
||||
"--temp"
|
||||
"1.0"
|
||||
"--top-p"
|
||||
"0.95"
|
||||
"--top-k"
|
||||
"64"
|
||||
"--ctx-size"
|
||||
"131072"
|
||||
"32768"
|
||||
"--threads"
|
||||
"8"
|
||||
"--batch-size"
|
||||
@@ -81,7 +74,7 @@ let
|
||||
"--gpu-layers"
|
||||
"999"
|
||||
"--flash-attn"
|
||||
"auto"
|
||||
"on"
|
||||
"--mlock"
|
||||
];
|
||||
};
|
||||
@@ -120,16 +113,16 @@ let
|
||||
# Systemd service for automatic model updates
|
||||
systemd = {
|
||||
services = {
|
||||
update-qwen-model = {
|
||||
description = "Update Qwen3-Coder-Next model from HuggingFace";
|
||||
update-gemma-model = {
|
||||
description = "Update Gemma 4 model from HuggingFace";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = "${pkgs.writeShellScript "update-qwen-model" ''
|
||||
ExecStart = "${pkgs.writeShellScript "update-gemma-model" ''
|
||||
set -euo pipefail
|
||||
|
||||
MODEL_DIR="${cfg.configDir}/llama-cpp/models"
|
||||
MODEL_NAME="${cfg.llama-cpp.model}.gguf"
|
||||
REPO_ID="unsloth/Qwen3-Coder-Next-GGUF"
|
||||
REPO_ID="unsloth/gemma-4-26B-A4B-it-GGUF"
|
||||
|
||||
# Create model directory if it doesn't exist
|
||||
mkdir -p "$MODEL_DIR"
|
||||
@@ -147,13 +140,13 @@ let
|
||||
Group = "jallen-nas";
|
||||
EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
|
||||
};
|
||||
unitConfig.OnFailure = "update-qwen-model-notify-failure.service";
|
||||
unitConfig.OnFailure = "update-gemma-model-notify-failure.service";
|
||||
# Run daily at 3 AM
|
||||
startAt = "*-*-* 03:00:00";
|
||||
};
|
||||
|
||||
update-qwen-model-notify-failure = {
|
||||
description = "Notify ntfy on update-qwen-model failure";
|
||||
update-gemma-model-notify-failure = {
|
||||
description = "Notify ntfy on update-gemma-model failure";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = "${ntfyModelFailScript}";
|
||||
@@ -163,8 +156,8 @@ let
|
||||
|
||||
# Ensure model is available before llama-cpp starts
|
||||
llama-cpp = {
|
||||
after = [ "update-qwen-model.service" ];
|
||||
wants = [ "update-qwen-model.service" ];
|
||||
after = [ "update-gemma-model.service" ];
|
||||
wants = [ "update-gemma-model.service" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user