{ config, lib, pkgs, namespace, ... }: with lib; let name = "grafana"; cfg = config.${namespace}.services.${name}; # --------------------------------------------------------------------------- # Community dashboards — fetched at build time, pinned by hash. # # Community dashboards use __inputs with a template variable (e.g. # ${DS_PROM} or ${DS_PROMETHEUS}) for the datasource UID. When provisioned # via file Grafana never substitutes those, so every panel is datasource- # broken. We patch each file at build time: replace all occurrences of the # template variable with our fixed datasource UID "prometheus", and strip # __inputs/__requires so Grafana doesn't treat the file as an import. # --------------------------------------------------------------------------- # Patch a community Grafana dashboard JSON at eval time using pure Nix: # 1. Parse the JSON with builtins.fromJSON # 2. Strip __inputs and __requires (import-only metadata) # 3. Replace the datasource UID template variable with our fixed UID # using builtins.replaceStrings on the re-serialised JSON string — # this avoids any ${} interpolation issues in Nix strings entirely. # 4. Write the result to the store with pkgs.writeText patchDashboard = name: src: dsVar: let raw = builtins.readFile src; d = builtins.fromJSON raw; # Strip import metadata then re-serialise stripped = builtins.toJSON ( builtins.removeAttrs d [ "__inputs" "__requires" ] ); # Replace the template variable (e.g. "${DS_PROMETHEUS}") with our UID. # builtins.replaceStrings takes lists so we never write ${} in Nix source. patched = builtins.replaceStrings [ ("\${" + dsVar + "}") ] [ "prometheus" ] stripped; in pkgs.writeText name patched; communityDashboards = pkgs.linkFarm "grafana-community-dashboards" [ { # Node Exporter Full — https://grafana.com/grafana/dashboards/1860 # Uses ${ds_prometheus} (lowercase) name = "node-exporter-full.json"; path = patchDashboard "node-exporter-full.json" (pkgs.fetchurl { url = "https://grafana.com/api/dashboards/1860/revisions/latest/download"; sha256 = "sha256-pNgn6xgZBEu6LW0lc0cXX2gRkQ8lg/rer34SPE3yEl4="; }) "ds_prometheus"; } { # PostgreSQL Database — https://grafana.com/grafana/dashboards/9628 name = "postgresql.json"; path = patchDashboard "postgresql.json" (pkgs.fetchurl { url = "https://grafana.com/api/dashboards/9628/revisions/latest/download"; sha256 = "sha256-UhusNAZbyt7fJV/DhFUK4FKOmnTpG0R15YO2r+nDnMc="; }) "DS_PROMETHEUS"; } { # Redis Dashboard for prometheus-redis-exporter 1.x — https://grafana.com/grafana/dashboards/763 # Uses DS_PROM; also patches out the 'namespace' template variable # since our metrics have no namespace label — all done in pure Nix. name = "redis.json"; path = let src = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/763/revisions/latest/download"; sha256 = "sha256-pThz+zHjcTT9vf8fpUuZK/ejNnH9GwEZVXOY27c9Aw8="; }; raw = builtins.readFile src; d = builtins.removeAttrs (builtins.fromJSON raw) [ "__inputs" "__requires" ]; # Drop the 'namespace' variable and fix 'instance' to query directly. fixedTemplating = d // { templating = d.templating // { list = map ( v: if v.name == "instance" then v // { query = "label_values(redis_up, instance)"; definition = "label_values(redis_up, instance)"; } else v ) (builtins.filter (v: v.name != "namespace") d.templating.list); }; }; patched = builtins.replaceStrings [ ("\${" + "DS_PROM" + "}") ] [ "prometheus" ] ( builtins.toJSON fixedTemplating ); in pkgs.writeText "redis.json" patched; } { # MySQL Overview — https://grafana.com/grafana/dashboards/7362 name = "mysql.json"; path = patchDashboard "mysql.json" (pkgs.fetchurl { url = "https://grafana.com/api/dashboards/7362/revisions/latest/download"; sha256 = "sha256-WW7g60KY20XAdyUpumA0hBrjFC9MQGuGjiJKUhSVBXI="; }) "DS_PROMETHEUS"; } { # Nextcloud — https://grafana.com/grafana/dashboards/9632 name = "nextcloud.json"; path = patchDashboard "nextcloud.json" (pkgs.fetchurl { url = "https://grafana.com/api/dashboards/9632/revisions/latest/download"; sha256 = "sha256-Z28Q/sMg3jxglkszAs83IpL8f4p9loNnTQzjc3S/SAQ="; }) "DS_PROMETHEUS"; } ]; # --------------------------------------------------------------------------- # Custom dashboards — maintained in this repo under dashboards/ # --------------------------------------------------------------------------- customDashboards = pkgs.linkFarm "grafana-custom-dashboards" [ { name = "nut.json"; path = ./dashboards/nut.json; } { name = "caddy.json"; path = ./dashboards/caddy.json; } { name = "gitea.json"; path = ./dashboards/gitea.json; } { name = "nas-overview.json"; path = ./dashboards/nas-overview.json; } ]; # Minimal .my.cnf for the mysqld exporter. No credentials are needed # because runAsLocalSuperUser = true runs as the mysql OS user, which # MariaDB authenticates via the unix_socket plugin automatically. mysqldExporterCnf = pkgs.writeText "prometheus-mysqld-exporter.cnf" '' [client] user=root socket=/run/mysqld/mysqld.sock ''; giteaPort = config.${namespace}.services.gitea.port; resticPort = config.${namespace}.services.restic.port; nextcloudPort = config.${namespace}.services.nextcloud.port; grafanaConfig = lib.${namespace}.mkModule { inherit config name; description = "grafana"; options = { }; moduleConfig = { services = { prometheus = { enable = true; # bearer_token_file paths (e.g. Gitea metrics key) are SOPS secrets # that only exist at runtime, not in the Nix build sandbox. # "syntax-only" still catches config errors without stat-ing the files. checkConfig = "syntax-only"; exporters = { node = { enable = true; enabledCollectors = [ "filesystem" "diskstats" "meminfo" "cpu" "systemd" "processes" ]; extraFlags = [ "--collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run)($|/)" ]; }; libvirt = { enable = false; openFirewall = true; }; nut = { enable = true; openFirewall = true; passwordPath = config.sops.secrets."jallen-nas/ups_password".path; nutUser = upsUser; }; # PostgreSQL — runs as the local postgres superuser via peer auth # (Unix socket, no password required). postgres = { enable = true; runAsLocalSuperUser = true; }; # Redis — single exporter instance covering all four Redis servers # via the multi-target scrape pattern (/scrape?target=). # The exporter needs AF_INET to reach TCP Redis instances. redis = { enable = true; # No fixed --redis.addr: multi-target mode uses ?target= param. }; # MariaDB — runs as the mysql OS user so it can connect via the # Unix socket without a password (unix_socket auth). mysqld = { enable = true; runAsLocalSuperUser = true; configFile = mysqldExporterCnf; }; # Nextcloud — authenticates with the admin account. # passwordFile must be readable by the prometheus-nextcloud-exporter # user; sops mode 0440 + group keys covers that. nextcloud = { enable = true; url = "http://localhost:${toString nextcloudPort}"; username = "mjallen"; passwordFile = config.sops.secrets."jallen-nas/nextcloud/adminpassword".path; }; }; scrapeConfigs = [ # ── System ────────────────────────────────────────────────────────── { job_name = "node"; static_configs = [ { targets = [ "localhost:${toString config.services.prometheus.exporters.node.port}" ]; } ]; } # ── UPS (NUT) ──────────────────────────────────────────────────────── { job_name = "nut"; # DRuggeri's nut_exporter serves UPS metrics at /ups_metrics, not /metrics. metrics_path = "/ups_metrics"; static_configs = [ { targets = [ "localhost:${toString config.services.prometheus.exporters.nut.port}" ]; } ]; } # ── Databases ──────────────────────────────────────────────────────── { job_name = "postgres"; static_configs = [ { targets = [ "localhost:${toString config.services.prometheus.exporters.postgres.port}" ]; } ]; } { # Redis multi-target: one exporter, four Redis instances. # The redis_exporter's /scrape?target= endpoint proxies each target # so a single exporter process covers all servers. job_name = "redis"; metrics_path = "/scrape"; static_configs = [ { targets = [ "redis://localhost:6379" # authentik "redis://localhost:6363" # ccache "redis://localhost:6380" # manyfold "redis://localhost:6381" # onlyoffice ]; } ]; relabel_configs = [ { source_labels = [ "__address__" ]; target_label = "__param_target"; } { source_labels = [ "__param_target" ]; target_label = "instance"; } { target_label = "__address__"; replacement = "localhost:${toString config.services.prometheus.exporters.redis.port}"; } ]; } { job_name = "mysqld"; static_configs = [ { targets = [ "localhost:${toString config.services.prometheus.exporters.mysqld.port}" ]; } ]; } # ── Application services ───────────────────────────────────────────── { # Caddy exposes its built-in Prometheus endpoint on port 2019. job_name = "caddy"; static_configs = [ { targets = [ "localhost:2019" ]; } ]; } { # Gitea's /metrics endpoint is protected by a Bearer token. job_name = "gitea"; metrics_path = "/metrics"; bearer_token_file = config.sops.secrets."jallen-nas/gitea/metrics-key".path; static_configs = [ { targets = [ "localhost:${toString giteaPort}" ]; } ]; } { # restic REST server exposes Prometheus metrics at /metrics. job_name = "restic"; metrics_path = "/metrics"; static_configs = [ { targets = [ "localhost:${toString resticPort}" ]; } ]; } { job_name = "nextcloud"; static_configs = [ { targets = [ "localhost:${toString config.services.prometheus.exporters.nextcloud.port}" ]; } ]; } ]; }; grafana = { enable = true; settings = { server = { http_port = cfg.port; http_addr = "0.0.0.0"; }; security = { # Read the secret key from a SOPS-managed file at runtime so it # never appears in the Nix store. The "$__file{}" syntax is # Grafana's built-in file provider. secret_key = "$__file{${config.sops.secrets."jallen-nas/grafana/secret-key".path}}"; }; # Grafana 12 enables kubernetesDashboards by default, which uses a # new storage backend that validates datasource refs in dashboard # files concurrently with datasource provisioning, causing a race # that always fails on a clean install. Disable it to use the # classic file provisioner that tolerates missing datasource refs. "feature_toggles" = { kubernetesDashboards = false; }; # Grafana 12 introduced permitted_provisioning_paths as a security # allowlist. The NixOS module stores all provisioning files in the # Nix store, which is not in the default allowlist, causing the # provisioner to silently refuse to load any files and then error # with "data source not found". paths.permitted_provisioning_paths = "/nix/store"; }; dataDir = "${cfg.configDir}/grafana"; provision = { enable = true; # Use path instead of settings to avoid the NixOS serializer # writing `secureJsonData: null` which Grafana 12 chokes on. datasources.path = pkgs.writeTextDir "datasource.yaml" '' apiVersion: 1 datasources: - name: Prometheus uid: prometheus type: prometheus access: proxy orgId: 1 url: http://localhost:${toString config.services.prometheus.port} editable: false jsonData: httpMethod: POST timeInterval: 15s ''; # Provide empty-but-valid alerting provisioning documents. # Without these, the NixOS module serialises `null` YAML which # Grafana 12's provisioner fails to parse, producing a spurious # "data source not found" error at startup. alerting = { rules.settings = { apiVersion = 1; groups = [ ]; }; contactPoints.settings = { apiVersion = 1; contactPoints = [ ]; }; policies.settings = { apiVersion = 1; policies = [ ]; }; templates.settings = { apiVersion = 1; templates = [ ]; }; muteTimings.settings = { apiVersion = 1; muteTimes = [ ]; }; }; dashboards.settings.providers = [ { name = "community"; orgId = 1; type = "file"; disableDeletion = true; updateIntervalSeconds = 60; allowUiUpdates = false; options.path = communityDashboards; } { name = "custom"; orgId = 1; type = "file"; disableDeletion = true; updateIntervalSeconds = 60; allowUiUpdates = false; options.path = customDashboards; } ]; }; }; }; # The redis exporter needs AF_INET to reach TCP Redis instances. # The default systemd hardening only allows AF_UNIX. systemd.services.prometheus-redis-exporter.serviceConfig.RestrictAddressFamilies = [ "AF_UNIX" "AF_INET" "AF_INET6" ]; }; }; upsUser = "nas-admin"; in { imports = [ grafanaConfig ]; }