{ config, lib, pkgs, inputs, system, namespace, ... }: with lib; let inherit (lib.${namespace}) mkOpt; cfg = config.${namespace}.services.ai; ntfyModelFailScript = pkgs.writeShellScript "update-qwen-model-notify-failure" '' HOST="$(${pkgs.hostname}/bin/hostname)" ${pkgs.curl}/bin/curl -sf \ --user "$NTFY_USER:$NTFY_PASSWORD" \ -H "Title: Qwen model update FAILED on $HOST" \ -H "Priority: high" \ -H "Tags: rotating_light,robot_face" \ -d "The daily update-qwen-model job failed. Check: journalctl -u update-qwen-model.service" \ "https://ntfy.mjallen.dev/builds" || true ''; aiConfig = lib.${namespace}.mkModule { inherit config; name = "ai"; description = "AI Services"; options = { llama-cpp = { model = mkOpt types.str "models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/b8654b48d979f2853b7a81d6541ca64eea7dc3c5/gemma-4-26B-A4B-it-UD-Q8_K_XL" ""; }; }; moduleConfig = { services = { ollama = { enable = true; package = pkgs.ollama-rocm; port = 11434; host = "0.0.0.0"; user = "nix-apps"; group = "jallen-nas"; openFirewall = cfg.openFirewall; rocmOverrideGfx = "11.0.2"; loadModels = [ ]; home = "${cfg.configDir}/ollama"; }; llama-cpp = { enable = true; port = 8127; host = "0.0.0.0"; openFirewall = cfg.openFirewall; model = "${cfg.configDir}/llama-cpp/models/${cfg.llama-cpp.model}.gguf"; package = inputs.llama-cpp.packages.${system}.rocm; extraFlags = [ "--fit" "on" "--seed" "3407" "--temp" "0.7" "--top-p" "0.9" "--min-p" "0.05" "--top-k" "30" "--jinja" "--ctx-size" "131072" "--threads" "8" "--batch-size" "512" "--gpu-layers" "999" "--flash-attn" "auto" "--mlock" ]; }; open-webui = { enable = true; package = pkgs.open-webui; host = "0.0.0.0"; port = 8888; openFirewall = cfg.openFirewall; environmentFile = config.sops.secrets."jallen-nas/open-webui".path; environment = { OPENID_PROVIDER_URL = "https://authentik.mjallen.dev/application/o/chat/.well-known/openid-configuration"; OAUTH_PROVIDER_NAME = "authentik"; OPENID_REDIRECT_URI = "https://chat.mjallen.dev/oauth/oidc/callback"; ENABLE_OAUTH_SIGNUP = "False"; OAUTH_MERGE_ACCOUNTS_BY_EMAIL = "True"; ENABLE_SIGNUP = "False"; ENABLE_LOGIN_FORM = "False"; ANONYMIZED_TELEMETRY = "False"; DO_NOT_TRACK = "True"; SCARF_NO_ANALYTICS = "True"; OLLAMA_API_BASE_URL = "http://127.0.0.1:11434"; LOCAL_FILES_ONLY = "False"; WEBUI_AUTH = "False"; }; }; }; # Model update script using HuggingFace Hub environment.systemPackages = with pkgs; [ amdgpu_top python3Packages.huggingface-hub ]; # Systemd service for automatic model updates systemd = { services = { update-qwen-model = { description = "Update Qwen3-Coder-Next model from HuggingFace"; serviceConfig = { Type = "oneshot"; ExecStart = "${pkgs.writeShellScript "update-qwen-model" '' set -euo pipefail MODEL_DIR="${cfg.configDir}/llama-cpp/models" MODEL_NAME="${cfg.llama-cpp.model}.gguf" REPO_ID="unsloth/Qwen3-Coder-Next-GGUF" # Create model directory if it doesn't exist mkdir -p "$MODEL_DIR" # Download the latest version of the model echo "Updating $MODEL_NAME from HuggingFace..." ${pkgs.python3Packages.huggingface-hub}/bin/hf download \ "$REPO_ID" \ "$MODEL_NAME" \ --local-dir "$MODEL_DIR" echo "Model updated successfully" ''}"; User = "nix-apps"; Group = "jallen-nas"; EnvironmentFile = [ config.sops.templates."ntfy.env".path ]; }; unitConfig.OnFailure = "update-qwen-model-notify-failure.service"; # Run daily at 3 AM startAt = "*-*-* 03:00:00"; }; update-qwen-model-notify-failure = { description = "Notify ntfy on update-qwen-model failure"; serviceConfig = { Type = "oneshot"; ExecStart = "${ntfyModelFailScript}"; EnvironmentFile = [ config.sops.templates."ntfy.env".path ]; }; }; # Ensure model is available before llama-cpp starts llama-cpp = { after = [ "update-qwen-model.service" ]; wants = [ "update-qwen-model.service" ]; }; }; }; }; }; in { imports = [ aiConfig ]; }