xtr temp

2026-03-25 22:24:19 -05:00
parent ab81e78b60
commit e119ffaabb
7 changed files with 1637 additions and 517 deletions
--- a/modules/home/desktop/plasma/default.nix
+++ b/modules/home/desktop/plasma/default.nix
@@ -15,6 +15,7 @@ in
    home.packages = with pkgs.kdePackages; [
      plasma-browser-integration
      kdeplasma-addons
+      kvantum
    ];

    programs.plasma = {
--- a/modules/nixos/hardware/npu/default.nix
+++ b/modules/nixos/hardware/npu/default.nix
@@ -0,0 +1,103 @@
+{
+  lib,
+  pkgs,
+  config,
+  namespace,
+  ...
+}:
+with lib;
+let
+  inherit (lib.${namespace}) mkBoolOpt;
+  cfg = config.${namespace}.hardware.npu;
+in
+{
+  # AMD XDNA 2 NPU support module.
+  #
+  # Enables the amdxdna kernel driver and installs the XRT userspace runtime
+  # (libxrt_coreutil + the XDNA shim plugin) built from packages/xrt.
+  #
+  # Prerequisites:
+  #   • Linux >= 6.14 (amdxdna in-tree) OR linux-firmware >= 20260221
+  #     for the NPU firmware blobs.  CachyOS kernels >= 6.14 satisfy this.
+  #   • AMD XDNA 2 NPU silicon (Strix Point, Strix Halo, Kraken Point,
+  #     Gorgon Point — Ryzen AI 300-series and later).
+  #
+  # What this module does:
+  #   1. Installs xrt (libxrt_coreutil, libxrt_driver_xdna, xrt-smi) from
+  #      the local flake package.
+  #   2. Loads the amdxdna kernel driver.
+  #   3. Raises the per-process memlock limit (required for NPU DMA buffers).
+  #   4. Optionally installs fastflowlm and exposes it system-wide.
+  #
+  # Usage (NixOS config):
+  #   ${namespace}.hardware.npu.enable = true;
+  #   # Enable FLM system-wide if you also run the lemonade service:
+  #   ${namespace}.hardware.npu.fastflowlm.enable = true;
+
+  options.${namespace}.hardware.npu = {
+    enable = mkEnableOption "AMD XDNA 2 NPU support (XRT + amdxdna driver)";
+
+    fastflowlm.enable = mkBoolOpt false ''
+      Install FastFlowLM (flm) system-wide.
+      FastFlowLM runs LLMs directly on the AMD XDNA 2 NPU.
+      Enable this when you also run the lemonade service with an NPU backend,
+      or want standalone `flm` access.
+    '';
+  };
+
+  config = mkIf cfg.enable {
+    assertions = [
+      {
+        assertion = pkgs.stdenv.hostPlatform.isx86_64;
+        message = "${namespace}.hardware.npu: AMD XDNA NPU support is only available on x86_64-linux.";
+      }
+    ];
+
+    # ── Kernel driver ──────────────────────────────────────────────────────
+    # amdxdna is built-in since Linux 6.14.  On older kernels (e.g. CachyOS
+    # 6.12/6.13) this explicit load request triggers the DKMS module if it is
+    # installed, or is silently ignored if the driver is already built-in.
+    boot.kernelModules = [ "amdxdna" ];
+
+    # ── XRT userspace runtime ──────────────────────────────────────────────
+    environment.systemPackages = [
+      pkgs.${namespace}.xrt
+    ]
+    ++ lib.optional cfg.fastflowlm.enable pkgs.${namespace}.fastflowlm;
+
+    # ── Memlock limit ──────────────────────────────────────────────────────
+    # NPU workloads require locking large memory regions for DMA.
+    # Without unlimited memlock the NPU will refuse to allocate buffers.
+    security.pam.loginLimits = [
+      {
+        domain = "*";
+        type = "-";
+        item = "memlock";
+        value = "unlimited";
+      }
+    ];
+
+    # For system services (e.g. lemonade, fastflowlm) that run under systemd,
+    # the PAM limit above does not apply — they must set LimitMEMLOCK in their
+    # unit.  We set a system-wide default via systemd.settings so every service
+    # inherits unlimited memlock unless it explicitly overrides it.
+    systemd.settings.Manager.DefaultLimitMEMLOCK = "infinity";
+
+    # ── NPU device permissions ─────────────────────────────────────────────
+    # amdxdna exposes the NPU as /dev/accel/accel0 (DRM accelerator device).
+    # Add a udev rule so members of the "render" group can open it without root.
+    services.udev.extraRules = ''
+      # AMD XDNA 2 NPU — grant access to the render group
+      SUBSYSTEM=="accel", KERNEL=="accel*", GROUP="render", MODE="0660"
+    '';
+
+    # Ensure the render group exists.
+    users.groups.render = { };
+
+    # ── Firmware ──────────────────────────────────────────────────────────
+    # The NPU firmware blobs ship in linux-firmware >= 20260221.
+    # hardware.enableAllFirmware (set by modules/nixos/hardware/common) already
+    # pulls in the full firmware set; this is an explicit belt-and-braces note.
+    hardware.firmware = [ pkgs.linux-firmware ];
+  };
+}
--- a/modules/nixos/services/grafana/default.nix
+++ b/modules/nixos/services/grafana/default.nix
@@ -395,460 +395,463 @@ let
            # ---------------------------------------------------------------------------
            # Alerting provisioning
            # ---------------------------------------------------------------------------
-            alerting = {
-              # ── Contact points ──────────────────────────────────────────────────
-              # ntfy via the Grafana webhook contact point.  Grafana POSTs a JSON
-              # body; ntfy accepts any body as the message text.  We use the
-              # message template below to format it nicely.
-              #
-              # Credentials are injected via Grafana's $__env{} provider, which
-              # reads from the process environment.  The GRAFANA_NTFY_USER and
-              # GRAFANA_NTFY_PASSWORD variables are set via the SOPS-managed
-              # grafana.env EnvironmentFile on the grafana.service unit.
-              #
-              # Note: $__file{} only works in grafana.ini settings, not in
-              # provisioning YAML files — using it here causes a parse error.
-              contactPoints.path = pkgs.writeTextDir "contactPoints.yaml" ''
-                apiVersion: 1
-                contactPoints:
-                  - name: ntfy
-                    receivers:
-                      - uid: ntfy-webhook
-                        type: webhook
-                        disableResolveMessage: false
-                        settings:
-                          url: https://ntfy.mjallen.dev/grafana-alerts
-                          httpMethod: POST
-                          username: $__env{GRAFANA_NTFY_USER}
-                          password: $__env{GRAFANA_NTFY_PASSWORD}
-                          httpHeaders:
-                            Tags: "chart,bell"
-              '';
+            # TEMPORARILY DISABLED - template format incompatible with Grafana 12
+            /*
+              alerting = {
+                # ── Contact points ──────────────────────────────────────────────────
+                # ntfy via the Grafana webhook contact point.  Grafana POSTs a JSON
+                # body; ntfy accepts any body as the message text.  We use the
+                # message template below to format it nicely.
+                #
+                # Credentials are injected via Grafana's $__env{} provider, which
+                # reads from the process environment.  The GRAFANA_NTFY_USER and
+                # GRAFANA_NTFY_PASSWORD variables are set via the SOPS-managed
+                # grafana.env EnvironmentFile on the grafana.service unit.
+                #
+                # Note: $__file{} only works in grafana.ini settings, not in
+                # provisioning YAML files — using it here causes a parse error.
+                contactPoints.path = pkgs.writeTextDir "contactPoints.yaml" ''
+                  apiVersion: 1
+                  contactPoints:
+                    - name: ntfy
+                      receivers:
+                        - uid: ntfy-webhook
+                          type: webhook
+                          disableResolveMessage: false
+                          settings:
+                            url: https://ntfy.mjallen.dev/grafana-alerts
+                            httpMethod: POST
+                            username: $__env{GRAFANA_NTFY_USER}
+                            password: $__env{GRAFANA_NTFY_PASSWORD}
+                            httpHeaders:
+                              Tags: "chart,bell"
+                '';

-              # ── Notification message template ───────────────────────────────────
-              # Grafana sends the rendered template body as the POST body.
-              # ntfy treats the body as the message text.
-              templates.settings = {
-                apiVersion = 1;
-                templates = [
-                  {
-                    name = "ntfy_message";
-                    template = ''
-                      {{ define "ntfy_message" -}}
-                      {{ .CommonAnnotations.summary | default .GroupLabels.alertname }}
-                      {{ range .Alerts -}}
-                      Status:    {{ .Status | title }}
-                      Alert:     {{ .Labels.alertname }}
-                      Severity:  {{ .Labels.severity | default "unknown" }}
-                      Instance:  {{ .Labels.instance | default "unknown" }}
-                      {{ if .Annotations.description -}}
-                      Details:   {{ .Annotations.description }}
-                      {{ end -}}
-                      {{ end -}}
-                      {{ end }}
-                    '';
-                  }
-                ];
+                # ── Notification message template ───────────────────────────────────
+                # Grafana sends the rendered template body as the POST body.
+                # ntfy treats the body as the message text.
+                templates.settings = {
+                  apiVersion = 1;
+                  templates = [
+                    {
+                      name = "ntfy_message";
+                      template = ''
+                        {{ define "ntfy_message" -}}
+                        {{ .CommonAnnotations.summary | default .GroupLabels.alertname }}
+                        {{ range .Alerts -}}
+                        Status:    {{ .Status | title }}
+                        Alert:     {{ .Labels.alertname }}
+                        Severity:  {{ .Labels.severity | default "unknown" }}
+                        Instance:  {{ .Labels.instance | default "unknown" }}
+                        {{ if .Annotations.description -}}
+                        Details:   {{ .Annotations.description }}
+                        {{ end -}}
+                        {{ end -}}
+                        {{ end }}
+                      '';
+                    }
+                  ];
+                };
+
+                # ── Notification routing policy ─────────────────────────────────────
+                policies.settings = {
+                  apiVersion = 1;
+                  policies = [
+                    {
+                      receiver = "ntfy";
+                      group_by = [
+                        "alertname"
+                        "severity"
+                      ];
+                      group_wait = "30s";
+                      group_interval = "5m";
+                      repeat_interval = "4h";
+                      routes = [
+                        # Critical alerts: repeat every 1h, no grouping wait
+                        {
+                          receiver = "ntfy";
+                          matchers = [ "severity = critical" ];
+                          group_wait = "0s";
+                          repeat_interval = "1h";
+                        }
+                      ];
+                    }
+                  ];
+                };
+
+                # ── Alert rules ─────────────────────────────────────────────────────
+                rules.settings = {
+                  apiVersion = 1;
+                  groups = [
+                    {
+                      name = "nas-system";
+                      folder = "NAS Alerts";
+                      interval = "1m";
+                      rules = [
+                        # Disk usage > 85% warning, > 95% critical
+                        {
+                          uid = "nas-disk-warning";
+                          title = "Disk usage high";
+                          condition = "C";
+                          data = [
+                            {
+                              refId = "A";
+                              datasourceUid = "prometheus";
+                              model = {
+                                expr = ''
+                                  (
+                                    node_filesystem_size_bytes{fstype!~"tmpfs|overlay|squashfs",mountpoint!~"/boot.*"}
+                                    - node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|squashfs",mountpoint!~"/boot.*"}
+                                  )
+                                  / node_filesystem_size_bytes{fstype!~"tmpfs|overlay|squashfs",mountpoint!~"/boot.*"}
+                                  * 100
+                                '';
+                                intervalMs = 60000;
+                                maxDataPoints = 43200;
+                                refId = "A";
+                              };
+                            }
+                            {
+                              refId = "B";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "reduce";
+                                refId = "B";
+                                expression = "A";
+                                reducer = "last";
+                              };
+                            }
+                            {
+                              refId = "C";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "threshold";
+                                refId = "C";
+                                expression = "B";
+                                conditions = [
+                                  {
+                                    evaluator = {
+                                      type = "gt";
+                                      params = [ 85 ];
+                                    };
+                                  }
+                                ];
+                              };
+                            }
+                          ];
+                          noDataState = "NoData";
+                          execErrState = "Error";
+                          for = "5m";
+                          annotations = {
+                            summary = "Disk usage above 85%";
+                            description = "Filesystem {{ $labels.mountpoint }} is {{ $values.B | printf \"%.1f\" }}% full.";
+                          };
+                          labels = {
+                            severity = "warning";
+                          };
+                          isPaused = false;
+                        }
+
+                        # Memory usage > 90%
+                        {
+                          uid = "nas-memory-high";
+                          title = "Memory usage high";
+                          condition = "C";
+                          data = [
+                            {
+                              refId = "A";
+                              datasourceUid = "prometheus";
+                              model = {
+                                expr = ''
+                                  (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100
+                                '';
+                                intervalMs = 60000;
+                                maxDataPoints = 43200;
+                                refId = "A";
+                              };
+                            }
+                            {
+                              refId = "B";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "reduce";
+                                refId = "B";
+                                expression = "A";
+                                reducer = "last";
+                              };
+                            }
+                            {
+                              refId = "C";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "threshold";
+                                refId = "C";
+                                expression = "B";
+                                conditions = [
+                                  {
+                                    evaluator = {
+                                      type = "gt";
+                                      params = [ 90 ];
+                                    };
+                                  }
+                                ];
+                              };
+                            }
+                          ];
+                          noDataState = "NoData";
+                          execErrState = "Error";
+                          for = "5m";
+                          annotations = {
+                            summary = "Memory usage above 90%";
+                            description = "Memory usage is {{ $values.B | printf \"%.1f\" }}%.";
+                          };
+                          labels = {
+                            severity = "warning";
+                          };
+                          isPaused = false;
+                        }
+
+                        # CPU > 90% sustained for 10m
+                        {
+                          uid = "nas-cpu-high";
+                          title = "CPU usage sustained high";
+                          condition = "C";
+                          data = [
+                            {
+                              refId = "A";
+                              datasourceUid = "prometheus";
+                              model = {
+                                expr = ''
+                                  100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
+                                '';
+                                intervalMs = 60000;
+                                maxDataPoints = 43200;
+                                refId = "A";
+                              };
+                            }
+                            {
+                              refId = "B";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "reduce";
+                                refId = "B";
+                                expression = "A";
+                                reducer = "last";
+                              };
+                            }
+                            {
+                              refId = "C";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "threshold";
+                                refId = "C";
+                                expression = "B";
+                                conditions = [
+                                  {
+                                    evaluator = {
+                                      type = "gt";
+                                      params = [ 90 ];
+                                    };
+                                  }
+                                ];
+                              };
+                            }
+                          ];
+                          noDataState = "NoData";
+                          execErrState = "Error";
+                          for = "10m";
+                          annotations = {
+                            summary = "CPU sustained above 90%";
+                            description = "CPU usage has been above 90% for 10 minutes (currently {{ $values.B | printf \"%.1f\" }}%).";
+                          };
+                          labels = {
+                            severity = "warning";
+                          };
+                          isPaused = false;
+                        }
+
+                        # UPS on battery (network_ups_tools_ups_status == 0 means OB/on-battery)
+                        {
+                          uid = "nas-ups-onbatt";
+                          title = "UPS on battery";
+                          condition = "C";
+                          data = [
+                            {
+                              refId = "A";
+                              datasourceUid = "prometheus";
+                              model = {
+                                expr = "network_ups_tools_ups_status";
+                                intervalMs = 60000;
+                                maxDataPoints = 43200;
+                                refId = "A";
+                              };
+                            }
+                            {
+                              refId = "B";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "reduce";
+                                refId = "B";
+                                expression = "A";
+                                reducer = "last";
+                              };
+                            }
+                            {
+                              refId = "C";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "threshold";
+                                refId = "C";
+                                expression = "B";
+                                # status 0 = OB (on battery), 1 = OL (online)
+                                conditions = [
+                                  {
+                                    evaluator = {
+                                      type = "lt";
+                                      params = [ 1 ];
+                                    };
+                                  }
+                                ];
+                              };
+                            }
+                          ];
+                          noDataState = "NoData";
+                          execErrState = "Error";
+                          for = "1m";
+                          annotations = {
+                            summary = "UPS is running on battery";
+                            description = "Mains power failure detected. UPS battery charge: {{ with query \"network_ups_tools_battery_charge\" }}{{ . | first | value | printf \"%.0f\" }}%{{ end }}.";
+                          };
+                          labels = {
+                            severity = "critical";
+                          };
+                          isPaused = false;
+                        }
+
+                        # UPS battery charge < 30%
+                        {
+                          uid = "nas-ups-lowbatt";
+                          title = "UPS battery low";
+                          condition = "C";
+                          data = [
+                            {
+                              refId = "A";
+                              datasourceUid = "prometheus";
+                              model = {
+                                expr = "network_ups_tools_battery_charge";
+                                intervalMs = 60000;
+                                maxDataPoints = 43200;
+                                refId = "A";
+                              };
+                            }
+                            {
+                              refId = "B";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "reduce";
+                                refId = "B";
+                                expression = "A";
+                                reducer = "last";
+                              };
+                            }
+                            {
+                              refId = "C";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "threshold";
+                                refId = "C";
+                                expression = "B";
+                                conditions = [
+                                  {
+                                    evaluator = {
+                                      type = "lt";
+                                      params = [ 30 ];
+                                    };
+                                  }
+                                ];
+                              };
+                            }
+                          ];
+                          noDataState = "NoData";
+                          execErrState = "Error";
+                          for = "2m";
+                          annotations = {
+                            summary = "UPS battery charge below 30%";
+                            description = "UPS battery is at {{ $values.B | printf \"%.0f\" }}%. Shutdown may be imminent.";
+                          };
+                          labels = {
+                            severity = "critical";
+                          };
+                          isPaused = false;
+                        }
+
+                        # PostgreSQL not responding
+                        {
+                          uid = "nas-postgres-down";
+                          title = "PostgreSQL down";
+                          condition = "C";
+                          data = [
+                            {
+                              refId = "A";
+                              datasourceUid = "prometheus";
+                              model = {
+                                expr = "pg_up";
+                                intervalMs = 60000;
+                                maxDataPoints = 43200;
+                                refId = "A";
+                              };
+                            }
+                            {
+                              refId = "B";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "reduce";
+                                refId = "B";
+                                expression = "A";
+                                reducer = "last";
+                              };
+                            }
+                            {
+                              refId = "C";
+                              datasourceUid = "__expr__";
+                              model = {
+                                type = "threshold";
+                                refId = "C";
+                                expression = "B";
+                                conditions = [
+                                  {
+                                    evaluator = {
+                                      type = "lt";
+                                      params = [ 1 ];
+                                    };
+                                  }
+                                ];
+                              };
+                            }
+                          ];
+                          noDataState = "Alerting";
+                          execErrState = "Error";
+                          for = "2m";
+                          annotations = {
+                            summary = "PostgreSQL is down";
+                            description = "The PostgreSQL exporter reports pg_up=0. Database may be unavailable.";
+                          };
+                          labels = {
+                            severity = "critical";
+                          };
+                          isPaused = false;
+                        }
+                      ];
+                    }
+                  ];
+                };
+
+                muteTimings.settings = {
+                  apiVersion = 1;
+                  muteTimes = [ ];
+                };
              };
-
-              # ── Notification routing policy ─────────────────────────────────────
-              policies.settings = {
-                apiVersion = 1;
-                policies = [
-                  {
-                    receiver = "ntfy";
-                    group_by = [
-                      "alertname"
-                      "severity"
-                    ];
-                    group_wait = "30s";
-                    group_interval = "5m";
-                    repeat_interval = "4h";
-                    routes = [
-                      # Critical alerts: repeat every 1h, no grouping wait
-                      {
-                        receiver = "ntfy";
-                        matchers = [ "severity = critical" ];
-                        group_wait = "0s";
-                        repeat_interval = "1h";
-                      }
-                    ];
-                  }
-                ];
-              };
-
-              # ── Alert rules ─────────────────────────────────────────────────────
-              rules.settings = {
-                apiVersion = 1;
-                groups = [
-                  {
-                    name = "nas-system";
-                    folder = "NAS Alerts";
-                    interval = "1m";
-                    rules = [
-                      # Disk usage > 85% warning, > 95% critical
-                      {
-                        uid = "nas-disk-warning";
-                        title = "Disk usage high";
-                        condition = "C";
-                        data = [
-                          {
-                            refId = "A";
-                            datasourceUid = "prometheus";
-                            model = {
-                              expr = ''
-                                (
-                                  node_filesystem_size_bytes{fstype!~"tmpfs|overlay|squashfs",mountpoint!~"/boot.*"}
-                                  - node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|squashfs",mountpoint!~"/boot.*"}
-                                )
-                                / node_filesystem_size_bytes{fstype!~"tmpfs|overlay|squashfs",mountpoint!~"/boot.*"}
-                                * 100
-                              '';
-                              intervalMs = 60000;
-                              maxDataPoints = 43200;
-                              refId = "A";
-                            };
-                          }
-                          {
-                            refId = "B";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "reduce";
-                              refId = "B";
-                              expression = "A";
-                              reducer = "last";
-                            };
-                          }
-                          {
-                            refId = "C";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "threshold";
-                              refId = "C";
-                              expression = "B";
-                              conditions = [
-                                {
-                                  evaluator = {
-                                    type = "gt";
-                                    params = [ 85 ];
-                                  };
-                                }
-                              ];
-                            };
-                          }
-                        ];
-                        noDataState = "NoData";
-                        execErrState = "Error";
-                        for = "5m";
-                        annotations = {
-                          summary = "Disk usage above 85%";
-                          description = "Filesystem {{ $labels.mountpoint }} is {{ $values.B | printf \"%.1f\" }}% full.";
-                        };
-                        labels = {
-                          severity = "warning";
-                        };
-                        isPaused = false;
-                      }
-
-                      # Memory usage > 90%
-                      {
-                        uid = "nas-memory-high";
-                        title = "Memory usage high";
-                        condition = "C";
-                        data = [
-                          {
-                            refId = "A";
-                            datasourceUid = "prometheus";
-                            model = {
-                              expr = ''
-                                (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100
-                              '';
-                              intervalMs = 60000;
-                              maxDataPoints = 43200;
-                              refId = "A";
-                            };
-                          }
-                          {
-                            refId = "B";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "reduce";
-                              refId = "B";
-                              expression = "A";
-                              reducer = "last";
-                            };
-                          }
-                          {
-                            refId = "C";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "threshold";
-                              refId = "C";
-                              expression = "B";
-                              conditions = [
-                                {
-                                  evaluator = {
-                                    type = "gt";
-                                    params = [ 90 ];
-                                  };
-                                }
-                              ];
-                            };
-                          }
-                        ];
-                        noDataState = "NoData";
-                        execErrState = "Error";
-                        for = "5m";
-                        annotations = {
-                          summary = "Memory usage above 90%";
-                          description = "Memory usage is {{ $values.B | printf \"%.1f\" }}%.";
-                        };
-                        labels = {
-                          severity = "warning";
-                        };
-                        isPaused = false;
-                      }
-
-                      # CPU > 90% sustained for 10m
-                      {
-                        uid = "nas-cpu-high";
-                        title = "CPU usage sustained high";
-                        condition = "C";
-                        data = [
-                          {
-                            refId = "A";
-                            datasourceUid = "prometheus";
-                            model = {
-                              expr = ''
-                                100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
-                              '';
-                              intervalMs = 60000;
-                              maxDataPoints = 43200;
-                              refId = "A";
-                            };
-                          }
-                          {
-                            refId = "B";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "reduce";
-                              refId = "B";
-                              expression = "A";
-                              reducer = "last";
-                            };
-                          }
-                          {
-                            refId = "C";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "threshold";
-                              refId = "C";
-                              expression = "B";
-                              conditions = [
-                                {
-                                  evaluator = {
-                                    type = "gt";
-                                    params = [ 90 ];
-                                  };
-                                }
-                              ];
-                            };
-                          }
-                        ];
-                        noDataState = "NoData";
-                        execErrState = "Error";
-                        for = "10m";
-                        annotations = {
-                          summary = "CPU sustained above 90%";
-                          description = "CPU usage has been above 90% for 10 minutes (currently {{ $values.B | printf \"%.1f\" }}%).";
-                        };
-                        labels = {
-                          severity = "warning";
-                        };
-                        isPaused = false;
-                      }
-
-                      # UPS on battery (network_ups_tools_ups_status == 0 means OB/on-battery)
-                      {
-                        uid = "nas-ups-onbatt";
-                        title = "UPS on battery";
-                        condition = "C";
-                        data = [
-                          {
-                            refId = "A";
-                            datasourceUid = "prometheus";
-                            model = {
-                              expr = "network_ups_tools_ups_status";
-                              intervalMs = 60000;
-                              maxDataPoints = 43200;
-                              refId = "A";
-                            };
-                          }
-                          {
-                            refId = "B";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "reduce";
-                              refId = "B";
-                              expression = "A";
-                              reducer = "last";
-                            };
-                          }
-                          {
-                            refId = "C";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "threshold";
-                              refId = "C";
-                              expression = "B";
-                              # status 0 = OB (on battery), 1 = OL (online)
-                              conditions = [
-                                {
-                                  evaluator = {
-                                    type = "lt";
-                                    params = [ 1 ];
-                                  };
-                                }
-                              ];
-                            };
-                          }
-                        ];
-                        noDataState = "NoData";
-                        execErrState = "Error";
-                        for = "1m";
-                        annotations = {
-                          summary = "UPS is running on battery";
-                          description = "Mains power failure detected. UPS battery charge: {{ with query \"network_ups_tools_battery_charge\" }}{{ . | first | value | printf \"%.0f\" }}%{{ end }}.";
-                        };
-                        labels = {
-                          severity = "critical";
-                        };
-                        isPaused = false;
-                      }
-
-                      # UPS battery charge < 30%
-                      {
-                        uid = "nas-ups-lowbatt";
-                        title = "UPS battery low";
-                        condition = "C";
-                        data = [
-                          {
-                            refId = "A";
-                            datasourceUid = "prometheus";
-                            model = {
-                              expr = "network_ups_tools_battery_charge";
-                              intervalMs = 60000;
-                              maxDataPoints = 43200;
-                              refId = "A";
-                            };
-                          }
-                          {
-                            refId = "B";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "reduce";
-                              refId = "B";
-                              expression = "A";
-                              reducer = "last";
-                            };
-                          }
-                          {
-                            refId = "C";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "threshold";
-                              refId = "C";
-                              expression = "B";
-                              conditions = [
-                                {
-                                  evaluator = {
-                                    type = "lt";
-                                    params = [ 30 ];
-                                  };
-                                }
-                              ];
-                            };
-                          }
-                        ];
-                        noDataState = "NoData";
-                        execErrState = "Error";
-                        for = "2m";
-                        annotations = {
-                          summary = "UPS battery charge below 30%";
-                          description = "UPS battery is at {{ $values.B | printf \"%.0f\" }}%. Shutdown may be imminent.";
-                        };
-                        labels = {
-                          severity = "critical";
-                        };
-                        isPaused = false;
-                      }
-
-                      # PostgreSQL not responding
-                      {
-                        uid = "nas-postgres-down";
-                        title = "PostgreSQL down";
-                        condition = "C";
-                        data = [
-                          {
-                            refId = "A";
-                            datasourceUid = "prometheus";
-                            model = {
-                              expr = "pg_up";
-                              intervalMs = 60000;
-                              maxDataPoints = 43200;
-                              refId = "A";
-                            };
-                          }
-                          {
-                            refId = "B";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "reduce";
-                              refId = "B";
-                              expression = "A";
-                              reducer = "last";
-                            };
-                          }
-                          {
-                            refId = "C";
-                            datasourceUid = "__expr__";
-                            model = {
-                              type = "threshold";
-                              refId = "C";
-                              expression = "B";
-                              conditions = [
-                                {
-                                  evaluator = {
-                                    type = "lt";
-                                    params = [ 1 ];
-                                  };
-                                }
-                              ];
-                            };
-                          }
-                        ];
-                        noDataState = "Alerting";
-                        execErrState = "Error";
-                        for = "2m";
-                        annotations = {
-                          summary = "PostgreSQL is down";
-                          description = "The PostgreSQL exporter reports pg_up=0. Database may be unavailable.";
-                        };
-                        labels = {
-                          severity = "critical";
-                        };
-                        isPaused = false;
-                      }
-                    ];
-                  }
-                ];
-              };
-
-              muteTimings.settings = {
-                apiVersion = 1;
-                muteTimes = [ ];
-              };
-            };
+            */

            dashboards.settings.providers = [
              {