nix-config/modules/nixos/services/ai/default.nix

{
  config,
  lib,
  pkgs,
  inputs,
  system,
  namespace,
  ...
}:
with lib;
let
  inherit (lib.${namespace}) mkOpt;

  cfg = config.${namespace}.services.ai;

  ntfyModelFailScript = pkgs.writeShellScript "update-qwen-model-notify-failure" ''
    HOST="$(${pkgs.hostname}/bin/hostname)"
    ${pkgs.curl}/bin/curl -sf \
      --user "$NTFY_USER:$NTFY_PASSWORD" \
      -H "Title: Qwen model update FAILED on $HOST" \
      -H "Priority: high" \
      -H "Tags: rotating_light,robot_face" \
      -d "The daily update-qwen-model job failed. Check: journalctl -u update-qwen-model.service" \
      "https://ntfy.mjallen.dev/builds" || true
  '';

  aiConfig = lib.${namespace}.mkModule {
    inherit config;
    name = "ai";
    description = "AI Services";
    options = {
      llama-cpp = {
        model =
          mkOpt types.str
            "models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/b8654b48d979f2853b7a81d6541ca64eea7dc3c5/gemma-4-26B-A4B-it-UD-Q8_K_XL"
            "";
      };
    };
    moduleConfig = {
      services = {
        ollama = {
          inherit (cfg) openFirewall;
          enable = true;
          package = pkgs.ollama-rocm;
          port = 11434;
          host = "0.0.0.0";
          user = "nix-apps";
          group = "jallen-nas";
          rocmOverrideGfx = "11.0.2";
          loadModels = [ ];
          home = "${cfg.configDir}/ollama";
        };

        llama-cpp = {
          inherit (cfg) openFirewall;
          enable = true;
          port = 8127;
          host = "0.0.0.0";
          model = "${cfg.configDir}/llama-cpp/models/${cfg.llama-cpp.model}.gguf";
          package = inputs.llama-cpp.packages.${system}.rocm;
          extraFlags = [
            "--fit"
            "on"
            "--seed"
            "3407"
            "--temp"
            "0.7"
            "--top-p"
            "0.9"
            "--min-p"
            "0.05"
            "--top-k"
            "30"
            "--jinja"
            "--ctx-size"
            "131072"
            "--threads"
            "8"
            "--batch-size"
            "512"
            "--gpu-layers"
            "999"
            "--flash-attn"
            "auto"
            "--mlock"
          ];
        };

        open-webui = {
          inherit (cfg) openFirewall;
          enable = true;
          package = pkgs.open-webui;
          host = "0.0.0.0";
          port = 8888;
          environmentFile = config.sops.secrets."jallen-nas/open-webui".path;
          environment = {
            OPENID_PROVIDER_URL = "https://authentik.mjallen.dev/application/o/chat/.well-known/openid-configuration";
            OAUTH_PROVIDER_NAME = "authentik";
            OPENID_REDIRECT_URI = "https://chat.mjallen.dev/oauth/oidc/callback";
            ENABLE_OAUTH_SIGNUP = "False";
            OAUTH_MERGE_ACCOUNTS_BY_EMAIL = "True";
            ENABLE_SIGNUP = "False";
            ENABLE_LOGIN_FORM = "False";
            ANONYMIZED_TELEMETRY = "False";
            DO_NOT_TRACK = "True";
            SCARF_NO_ANALYTICS = "True";
            OLLAMA_API_BASE_URL = "http://127.0.0.1:11434";
            LOCAL_FILES_ONLY = "False";
            WEBUI_AUTH = "False";
          };
        };
      };

      # Model update script using HuggingFace Hub
      environment.systemPackages = with pkgs; [
        amdgpu_top
        python3Packages.huggingface-hub
      ];

      # Systemd service for automatic model updates
      systemd = {
        services = {
          update-qwen-model = {
            description = "Update Qwen3-Coder-Next model from HuggingFace";
            serviceConfig = {
              Type = "oneshot";
              ExecStart = "${pkgs.writeShellScript "update-qwen-model" ''
                set -euo pipefail

                MODEL_DIR="${cfg.configDir}/llama-cpp/models"
                MODEL_NAME="${cfg.llama-cpp.model}.gguf"
                REPO_ID="unsloth/Qwen3-Coder-Next-GGUF"

                # Create model directory if it doesn't exist
                mkdir -p "$MODEL_DIR"

                # Download the latest version of the model
                echo "Updating $MODEL_NAME from HuggingFace..."
                ${pkgs.python3Packages.huggingface-hub}/bin/hf download \
                  "$REPO_ID" \
                  "$MODEL_NAME" \
                  --local-dir "$MODEL_DIR"

                echo "Model updated successfully"
              ''}";
              User = "nix-apps";
              Group = "jallen-nas";
              EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
            };
            unitConfig.OnFailure = "update-qwen-model-notify-failure.service";
            # Run daily at 3 AM
            startAt = "*-*-* 03:00:00";
          };

          update-qwen-model-notify-failure = {
            description = "Notify ntfy on update-qwen-model failure";
            serviceConfig = {
              Type = "oneshot";
              ExecStart = "${ntfyModelFailScript}";
              EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
            };
          };

          # Ensure model is available before llama-cpp starts
          llama-cpp = {
            after = [ "update-qwen-model.service" ];
            wants = [ "update-qwen-model.service" ];
          };
        };
      };
    };
  };
in
{
  imports = [ aiConfig ];
}