diff --git a/modules/nixos/services/grafana/default.nix b/modules/nixos/services/grafana/default.nix index 46c16fe..ddc5cee 100755 --- a/modules/nixos/services/grafana/default.nix +++ b/modules/nixos/services/grafana/default.nix @@ -12,47 +12,110 @@ let # --------------------------------------------------------------------------- # Community dashboards — fetched at build time, pinned by hash. + # + # Community dashboards use __inputs with a template variable (e.g. + # ${DS_PROM} or ${DS_PROMETHEUS}) for the datasource UID. When provisioned + # via file Grafana never substitutes those, so every panel is datasource- + # broken. We patch each file at build time: replace all occurrences of the + # template variable with our fixed datasource UID "prometheus", and strip + # __inputs/__requires so Grafana doesn't treat the file as an import. # --------------------------------------------------------------------------- + # Patch a community Grafana dashboard JSON at eval time using pure Nix: + # 1. Parse the JSON with builtins.fromJSON + # 2. Strip __inputs and __requires (import-only metadata) + # 3. Replace the datasource UID template variable with our fixed UID + # using builtins.replaceStrings on the re-serialised JSON string — + # this avoids any ${} interpolation issues in Nix strings entirely. + # 4. Write the result to the store with pkgs.writeText + patchDashboard = + name: src: dsVar: + let + raw = builtins.readFile src; + d = builtins.fromJSON raw; + # Strip import metadata then re-serialise + stripped = builtins.toJSON ( + builtins.removeAttrs d [ + "__inputs" + "__requires" + ] + ); + # Replace the template variable (e.g. "${DS_PROMETHEUS}") with our UID. + # builtins.replaceStrings takes lists so we never write ${} in Nix source. + patched = builtins.replaceStrings [ ("\${" + dsVar + "}") ] [ "prometheus" ] stripped; + in + pkgs.writeText name patched; + communityDashboards = pkgs.linkFarm "grafana-community-dashboards" [ { # Node Exporter Full — https://grafana.com/grafana/dashboards/1860 + # Uses ${ds_prometheus} (lowercase) name = "node-exporter-full.json"; - path = pkgs.fetchurl { + path = patchDashboard "node-exporter-full.json" (pkgs.fetchurl { url = "https://grafana.com/api/dashboards/1860/revisions/latest/download"; sha256 = "sha256-pNgn6xgZBEu6LW0lc0cXX2gRkQ8lg/rer34SPE3yEl4="; - }; + }) "ds_prometheus"; } { # PostgreSQL Database — https://grafana.com/grafana/dashboards/9628 name = "postgresql.json"; - path = pkgs.fetchurl { + path = patchDashboard "postgresql.json" (pkgs.fetchurl { url = "https://grafana.com/api/dashboards/9628/revisions/latest/download"; sha256 = "sha256-UhusNAZbyt7fJV/DhFUK4FKOmnTpG0R15YO2r+nDnMc="; - }; + }) "DS_PROMETHEUS"; } { # Redis Dashboard for prometheus-redis-exporter 1.x — https://grafana.com/grafana/dashboards/763 + # Uses DS_PROM; also patches out the 'namespace' template variable + # since our metrics have no namespace label — all done in pure Nix. name = "redis.json"; - path = pkgs.fetchurl { - url = "https://grafana.com/api/dashboards/763/revisions/latest/download"; - sha256 = "sha256-pThz+zHjcTT9vf8fpUuZK/ejNnH9GwEZVXOY27c9Aw8="; - }; + path = + let + src = pkgs.fetchurl { + url = "https://grafana.com/api/dashboards/763/revisions/latest/download"; + sha256 = "sha256-pThz+zHjcTT9vf8fpUuZK/ejNnH9GwEZVXOY27c9Aw8="; + }; + raw = builtins.readFile src; + d = builtins.removeAttrs (builtins.fromJSON raw) [ + "__inputs" + "__requires" + ]; + # Drop the 'namespace' variable and fix 'instance' to query directly. + fixedTemplating = d // { + templating = d.templating // { + list = map ( + v: + if v.name == "instance" then + v + // { + query = "label_values(redis_up, instance)"; + definition = "label_values(redis_up, instance)"; + } + else + v + ) (builtins.filter (v: v.name != "namespace") d.templating.list); + }; + }; + patched = builtins.replaceStrings [ ("\${" + "DS_PROM" + "}") ] [ "prometheus" ] ( + builtins.toJSON fixedTemplating + ); + in + pkgs.writeText "redis.json" patched; } { # MySQL Overview — https://grafana.com/grafana/dashboards/7362 name = "mysql.json"; - path = pkgs.fetchurl { + path = patchDashboard "mysql.json" (pkgs.fetchurl { url = "https://grafana.com/api/dashboards/7362/revisions/latest/download"; sha256 = "sha256-WW7g60KY20XAdyUpumA0hBrjFC9MQGuGjiJKUhSVBXI="; - }; + }) "DS_PROMETHEUS"; } { # Nextcloud — https://grafana.com/grafana/dashboards/9632 name = "nextcloud.json"; - path = pkgs.fetchurl { + path = patchDashboard "nextcloud.json" (pkgs.fetchurl { url = "https://grafana.com/api/dashboards/9632/revisions/latest/download"; sha256 = "sha256-Z28Q/sMg3jxglkszAs83IpL8f4p9loNnTQzjc3S/SAQ="; - }; + }) "DS_PROMETHEUS"; } ]; @@ -179,6 +242,8 @@ let # ── UPS (NUT) ──────────────────────────────────────────────────────── { job_name = "nut"; + # DRuggeri's nut_exporter serves UPS metrics at /ups_metrics, not /metrics. + metrics_path = "/ups_metrics"; static_configs = [ { targets = [ "localhost:${toString config.services.prometheus.exporters.nut.port}" ]; @@ -290,20 +355,70 @@ let # Grafana's built-in file provider. secret_key = "$__file{${config.sops.secrets."jallen-nas/grafana/secret-key".path}}"; }; + # Grafana 12 enables kubernetesDashboards by default, which uses a + # new storage backend that validates datasource refs in dashboard + # files concurrently with datasource provisioning, causing a race + # that always fails on a clean install. Disable it to use the + # classic file provisioner that tolerates missing datasource refs. + "feature_toggles" = { + kubernetesDashboards = false; + }; + + # Grafana 12 introduced permitted_provisioning_paths as a security + # allowlist. The NixOS module stores all provisioning files in the + # Nix store, which is not in the default allowlist, causing the + # provisioner to silently refuse to load any files and then error + # with "data source not found". + paths.permitted_provisioning_paths = "/nix/store"; }; dataDir = "${cfg.configDir}/grafana"; provision = { enable = true; - datasources.settings.datasources = [ - { - name = "Prometheus"; - type = "prometheus"; - access = "proxy"; - url = "http://localhost:${toString config.services.prometheus.port}"; - } - ]; + # Use path instead of settings to avoid the NixOS serializer + # writing `secureJsonData: null` which Grafana 12 chokes on. + datasources.path = pkgs.writeTextDir "datasource.yaml" '' + apiVersion: 1 + datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + orgId: 1 + url: http://localhost:${toString config.services.prometheus.port} + editable: false + jsonData: + httpMethod: POST + timeInterval: 15s + ''; + # Provide empty-but-valid alerting provisioning documents. + # Without these, the NixOS module serialises `null` YAML which + # Grafana 12's provisioner fails to parse, producing a spurious + # "data source not found" error at startup. + alerting = { + rules.settings = { + apiVersion = 1; + groups = [ ]; + }; + contactPoints.settings = { + apiVersion = 1; + contactPoints = [ ]; + }; + policies.settings = { + apiVersion = 1; + policies = [ ]; + }; + templates.settings = { + apiVersion = 1; + templates = [ ]; + }; + muteTimings.settings = { + apiVersion = 1; + muteTimes = [ ]; + }; + }; + dashboards.settings.providers = [ { name = "community"; diff --git a/systems/x86_64-linux/jallen-nas/users.nix b/systems/x86_64-linux/jallen-nas/users.nix index 56e2e73..771eec0 100755 --- a/systems/x86_64-linux/jallen-nas/users.nix +++ b/systems/x86_64-linux/jallen-nas/users.nix @@ -52,6 +52,11 @@ in group = "nextcloud-exporter"; extraGroups = [ "keys" ]; }; + + # Prometheus reads bearer_token_file for the Gitea scrape job at runtime. + prometheus = { + extraGroups = [ "keys" ]; + }; }; groups.nextcloud-exporter = { };