cyd

2026-04-14 16:12:54 -05:00
parent c3abeb846d
commit 74b1825d4d
10 changed files with 2429 additions and 35 deletions
--- a/modules/nixos/services/ai/default.nix
+++ b/modules/nixos/services/ai/default.nix
@@ -13,14 +13,14 @@ let

  cfg = config.${namespace}.services.ai;

-  ntfyModelFailScript = pkgs.writeShellScript "update-qwen-model-notify-failure" ''
+  ntfyModelFailScript = pkgs.writeShellScript "update-gemma-model-notify-failure" ''
    HOST="$(${pkgs.hostname}/bin/hostname)"
    ${pkgs.curl}/bin/curl -sf \
      --user "$NTFY_USER:$NTFY_PASSWORD" \
-      -H "Title: Qwen model update FAILED on $HOST" \
+      -H "Title: Gemma model update FAILED on $HOST" \
      -H "Priority: high" \
      -H "Tags: rotating_light,robot_face" \
-      -d "The daily update-qwen-model job failed. Check: journalctl -u update-qwen-model.service" \
+      -d "The daily update-gemma-model job failed. Check: journalctl -u update-gemma-model.service" \
      "https://ntfy.mjallen.dev/builds" || true
  '';

@@ -30,10 +30,7 @@ let
    description = "AI Services";
    options = {
      llama-cpp = {
-        model =
-          mkOpt types.str
-            "models--unsloth--gemma-4-26B-A4B-it-GGUF/snapshots/b8654b48d979f2853b7a81d6541ca64eea7dc3c5/gemma-4-26B-A4B-it-UD-Q8_K_XL"
-            "";
+        model = mkOpt types.str "gemma-4-26B-A4B-it-UD-Q8_K_XL" "";
      };
    };
    moduleConfig = {
@@ -59,21 +56,17 @@ let
          model = "${cfg.configDir}/llama-cpp/models/${cfg.llama-cpp.model}.gguf";
          package = inputs.llama-cpp.packages.${system}.rocm;
          extraFlags = [
-            "--fit"
-            "on"
-            "--seed"
-            "3407"
-            "--temp"
-            "0.7"
-            "--top-p"
-            "0.9"
-            "--min-p"
-            "0.05"
-            "--top-k"
-            "30"
            "--jinja"
+            "--chat-template-kwargs"
+            "{\"enable_thinking\":true}"
+            "--temp"
+            "1.0"
+            "--top-p"
+            "0.95"
+            "--top-k"
+            "64"
            "--ctx-size"
-            "131072"
+            "32768"
            "--threads"
            "8"
            "--batch-size"
@@ -81,7 +74,7 @@ let
            "--gpu-layers"
            "999"
            "--flash-attn"
-            "auto"
+            "on"
            "--mlock"
          ];
        };
@@ -120,16 +113,16 @@ let
      # Systemd service for automatic model updates
      systemd = {
        services = {
-          update-qwen-model = {
-            description = "Update Qwen3-Coder-Next model from HuggingFace";
+          update-gemma-model = {
+            description = "Update Gemma 4 model from HuggingFace";
            serviceConfig = {
              Type = "oneshot";
-              ExecStart = "${pkgs.writeShellScript "update-qwen-model" ''
+              ExecStart = "${pkgs.writeShellScript "update-gemma-model" ''
                set -euo pipefail

                MODEL_DIR="${cfg.configDir}/llama-cpp/models"
                MODEL_NAME="${cfg.llama-cpp.model}.gguf"
-                REPO_ID="unsloth/Qwen3-Coder-Next-GGUF"
+                REPO_ID="unsloth/gemma-4-26B-A4B-it-GGUF"

                # Create model directory if it doesn't exist
                mkdir -p "$MODEL_DIR"
@@ -147,13 +140,13 @@ let
              Group = "jallen-nas";
              EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
            };
-            unitConfig.OnFailure = "update-qwen-model-notify-failure.service";
+            unitConfig.OnFailure = "update-gemma-model-notify-failure.service";
            # Run daily at 3 AM
            startAt = "*-*-* 03:00:00";
          };

-          update-qwen-model-notify-failure = {
-            description = "Notify ntfy on update-qwen-model failure";
+          update-gemma-model-notify-failure = {
+            description = "Notify ntfy on update-gemma-model failure";
            serviceConfig = {
              Type = "oneshot";
              ExecStart = "${ntfyModelFailScript}";
@@ -163,8 +156,8 @@ let

          # Ensure model is available before llama-cpp starts
          llama-cpp = {
-            after = [ "update-qwen-model.service" ];
-            wants = [ "update-qwen-model.service" ];
+            after = [ "update-gemma-model.service" ];
+            wants = [ "update-gemma-model.service" ];
          };
        };
      };