ha
This commit is contained in:
@@ -7,12 +7,11 @@
|
||||
}:
|
||||
with lib;
|
||||
let
|
||||
name = "ai";
|
||||
cfg = config.${namespace}.services.${name};
|
||||
cfg = config.${namespace}.services.ai;
|
||||
|
||||
aiConfig = lib.${namespace}.mkModule {
|
||||
inherit config name;
|
||||
serviceName = "open-webui"; # todo multiple?
|
||||
inherit config;
|
||||
name = "ai";
|
||||
description = "AI Services";
|
||||
options = { };
|
||||
moduleConfig = {
|
||||
@@ -43,14 +42,25 @@ let
|
||||
"--seed"
|
||||
"3407"
|
||||
"--temp"
|
||||
"1.0"
|
||||
"0.7"
|
||||
"--top-p"
|
||||
"0.95"
|
||||
"0.9"
|
||||
"--min-p"
|
||||
"0.01"
|
||||
"0.05"
|
||||
"--top-k"
|
||||
"40"
|
||||
"30"
|
||||
"--jinja"
|
||||
"--ctx-size"
|
||||
"4096"
|
||||
"--threads"
|
||||
"8"
|
||||
"--batch-size"
|
||||
"512"
|
||||
"--gpu-layers"
|
||||
"999"
|
||||
"--flash-attn"
|
||||
"auto"
|
||||
"--mlock"
|
||||
];
|
||||
};
|
||||
|
||||
@@ -79,16 +89,52 @@ let
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Model update script using HuggingFace Hub
|
||||
environment.systemPackages = with pkgs; [
|
||||
amdgpu_top
|
||||
python3Packages.huggingface-hub
|
||||
];
|
||||
|
||||
# Systemd service for automatic model updates
|
||||
systemd.services.update-qwen-model = {
|
||||
description = "Update Qwen3-Coder-Next model from HuggingFace";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = "${pkgs.writeShellScript "update-qwen-model" ''
|
||||
set -euo pipefail
|
||||
|
||||
MODEL_DIR="${cfg.configDir}/llama-cpp/models"
|
||||
MODEL_NAME="Qwen3-Coder-Next-Q4_0.gguf"
|
||||
REPO_ID="unsloth/Qwen3-Coder-Next-GGUF"
|
||||
|
||||
# Create model directory if it doesn't exist
|
||||
mkdir -p "$MODEL_DIR"
|
||||
|
||||
# Download the latest version of the model
|
||||
echo "Updating $MODEL_NAME from HuggingFace..."
|
||||
${pkgs.python3Packages.huggingface-hub}/bin/huggingface-cli download \
|
||||
"$REPO_ID" \
|
||||
"$MODEL_NAME" \
|
||||
--local-dir "$MODEL_DIR"
|
||||
|
||||
echo "Model updated successfully"
|
||||
''}";
|
||||
User = "nix-apps";
|
||||
Group = "jallen-nas";
|
||||
};
|
||||
# Run daily at 3 AM
|
||||
startAt = "*-*-* 03:00:00";
|
||||
};
|
||||
|
||||
# Ensure model is available before llama-cpp starts
|
||||
systemd.services.llama-cpp = {
|
||||
after = [ "update-qwen-model.service" ];
|
||||
wants = [ "update-qwen-model.service" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
imports = [ aiConfig ];
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
environment.systemPackages = with pkgs; [
|
||||
amdgpu_top
|
||||
python3Packages.huggingface-hub
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user