Files
nix-config/modules/nixos/services/grafana/default.nix
2026-03-24 13:02:17 -05:00

461 lines
17 KiB
Nix
Executable File

{
config,
lib,
pkgs,
namespace,
...
}:
with lib;
let
name = "grafana";
cfg = config.${namespace}.services.${name};
# ---------------------------------------------------------------------------
# Community dashboards — fetched at build time, pinned by hash.
#
# Community dashboards use __inputs with a template variable (e.g.
# ${DS_PROM} or ${DS_PROMETHEUS}) for the datasource UID. When provisioned
# via file Grafana never substitutes those, so every panel is datasource-
# broken. We patch each file at build time: replace all occurrences of the
# template variable with our fixed datasource UID "prometheus", and strip
# __inputs/__requires so Grafana doesn't treat the file as an import.
# ---------------------------------------------------------------------------
# Patch a community Grafana dashboard JSON at eval time using pure Nix:
# 1. Parse the JSON with builtins.fromJSON
# 2. Strip __inputs and __requires (import-only metadata)
# 3. Replace the datasource UID template variable with our fixed UID
# using builtins.replaceStrings on the re-serialised JSON string —
# this avoids any ${} interpolation issues in Nix strings entirely.
# 4. Write the result to the store with pkgs.writeText
patchDashboard =
name: src: dsVar:
let
raw = builtins.readFile src;
d = builtins.fromJSON raw;
# Strip import metadata then re-serialise
stripped = builtins.toJSON (
builtins.removeAttrs d [
"__inputs"
"__requires"
]
);
# Replace the template variable (e.g. "${DS_PROMETHEUS}") with our UID.
# builtins.replaceStrings takes lists so we never write ${} in Nix source.
patched = builtins.replaceStrings [ ("\${" + dsVar + "}") ] [ "prometheus" ] stripped;
in
pkgs.writeText name patched;
communityDashboards = pkgs.linkFarm "grafana-community-dashboards" [
{
# Node Exporter Full — https://grafana.com/grafana/dashboards/1860
# Uses ${ds_prometheus} (lowercase)
name = "node-exporter-full.json";
path = patchDashboard "node-exporter-full.json" (pkgs.fetchurl {
url = "https://grafana.com/api/dashboards/1860/revisions/latest/download";
sha256 = "sha256-pNgn6xgZBEu6LW0lc0cXX2gRkQ8lg/rer34SPE3yEl4=";
}) "ds_prometheus";
}
{
# PostgreSQL Database — https://grafana.com/grafana/dashboards/9628
name = "postgresql.json";
path = patchDashboard "postgresql.json" (pkgs.fetchurl {
url = "https://grafana.com/api/dashboards/9628/revisions/latest/download";
sha256 = "sha256-UhusNAZbyt7fJV/DhFUK4FKOmnTpG0R15YO2r+nDnMc=";
}) "DS_PROMETHEUS";
}
{
# Redis Dashboard for prometheus-redis-exporter 1.x — https://grafana.com/grafana/dashboards/763
# Uses DS_PROM; also patches out the 'namespace' template variable
# since our metrics have no namespace label — all done in pure Nix.
name = "redis.json";
path =
let
src = pkgs.fetchurl {
url = "https://grafana.com/api/dashboards/763/revisions/latest/download";
sha256 = "sha256-pThz+zHjcTT9vf8fpUuZK/ejNnH9GwEZVXOY27c9Aw8=";
};
raw = builtins.readFile src;
d = builtins.removeAttrs (builtins.fromJSON raw) [
"__inputs"
"__requires"
];
# Drop the 'namespace' variable and fix 'instance' to query directly.
fixedTemplating = d // {
templating = d.templating // {
list = map (
v:
if v.name == "instance" then
v
// {
query = "label_values(redis_up, instance)";
definition = "label_values(redis_up, instance)";
}
else
v
) (builtins.filter (v: v.name != "namespace") d.templating.list);
};
};
patched = builtins.replaceStrings [ ("\${" + "DS_PROM" + "}") ] [ "prometheus" ] (
builtins.toJSON fixedTemplating
);
in
pkgs.writeText "redis.json" patched;
}
{
# MySQL Overview — https://grafana.com/grafana/dashboards/7362
name = "mysql.json";
path = patchDashboard "mysql.json" (pkgs.fetchurl {
url = "https://grafana.com/api/dashboards/7362/revisions/latest/download";
sha256 = "sha256-WW7g60KY20XAdyUpumA0hBrjFC9MQGuGjiJKUhSVBXI=";
}) "DS_PROMETHEUS";
}
{
# Nextcloud — https://grafana.com/grafana/dashboards/9632
name = "nextcloud.json";
path = patchDashboard "nextcloud.json" (pkgs.fetchurl {
url = "https://grafana.com/api/dashboards/9632/revisions/latest/download";
sha256 = "sha256-Z28Q/sMg3jxglkszAs83IpL8f4p9loNnTQzjc3S/SAQ=";
}) "DS_PROMETHEUS";
}
];
# ---------------------------------------------------------------------------
# Custom dashboards — maintained in this repo under dashboards/
# ---------------------------------------------------------------------------
customDashboards = pkgs.linkFarm "grafana-custom-dashboards" [
{
name = "nut.json";
path = ./dashboards/nut.json;
}
{
name = "caddy.json";
path = ./dashboards/caddy.json;
}
{
name = "gitea.json";
path = ./dashboards/gitea.json;
}
{
name = "nas-overview.json";
path = ./dashboards/nas-overview.json;
}
];
# Minimal .my.cnf for the mysqld exporter. No credentials are needed
# because runAsLocalSuperUser = true runs as the mysql OS user, which
# MariaDB authenticates via the unix_socket plugin automatically.
mysqldExporterCnf = pkgs.writeText "prometheus-mysqld-exporter.cnf" ''
[client]
user=root
socket=/run/mysqld/mysqld.sock
'';
giteaPort = config.${namespace}.services.gitea.port;
resticPort = config.${namespace}.services.restic.port;
nextcloudPort = config.${namespace}.services.nextcloud.port;
grafanaConfig = lib.${namespace}.mkModule {
inherit config name;
description = "grafana";
options = { };
moduleConfig = {
services = {
prometheus = {
enable = true;
# bearer_token_file paths (e.g. Gitea metrics key) are SOPS secrets
# that only exist at runtime, not in the Nix build sandbox.
# "syntax-only" still catches config errors without stat-ing the files.
checkConfig = "syntax-only";
exporters = {
node = {
enable = true;
enabledCollectors = [
"filesystem"
"diskstats"
"meminfo"
"cpu"
"systemd"
"processes"
];
extraFlags = [
"--collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run)($|/)"
];
};
libvirt = {
enable = false;
openFirewall = true;
};
nut = {
enable = true;
openFirewall = true;
passwordPath = config.sops.secrets."jallen-nas/ups_password".path;
nutUser = upsUser;
};
# PostgreSQL — runs as the local postgres superuser via peer auth
# (Unix socket, no password required).
postgres = {
enable = true;
runAsLocalSuperUser = true;
};
# Redis — single exporter instance covering all four Redis servers
# via the multi-target scrape pattern (/scrape?target=<addr>).
# The exporter needs AF_INET to reach TCP Redis instances.
redis = {
enable = true;
# No fixed --redis.addr: multi-target mode uses ?target= param.
};
# MariaDB — runs as the mysql OS user so it can connect via the
# Unix socket without a password (unix_socket auth).
mysqld = {
enable = true;
runAsLocalSuperUser = true;
configFile = mysqldExporterCnf;
};
# Nextcloud — authenticates with the admin account.
# passwordFile must be readable by the prometheus-nextcloud-exporter
# user; sops mode 0440 + group keys covers that.
nextcloud = {
enable = true;
url = "http://localhost:${toString nextcloudPort}";
username = "mjallen";
passwordFile = config.sops.secrets."jallen-nas/nextcloud/adminpassword".path;
};
};
scrapeConfigs = [
# ── System ──────────────────────────────────────────────────────────
{
job_name = "node";
static_configs = [
{
targets = [ "localhost:${toString config.services.prometheus.exporters.node.port}" ];
}
];
}
# ── UPS (NUT) ────────────────────────────────────────────────────────
{
job_name = "nut";
# DRuggeri's nut_exporter serves UPS metrics at /ups_metrics, not /metrics.
metrics_path = "/ups_metrics";
static_configs = [
{
targets = [ "localhost:${toString config.services.prometheus.exporters.nut.port}" ];
}
];
}
# ── Databases ────────────────────────────────────────────────────────
{
job_name = "postgres";
static_configs = [
{
targets = [ "localhost:${toString config.services.prometheus.exporters.postgres.port}" ];
}
];
}
{
# Redis multi-target: one exporter, four Redis instances.
# The redis_exporter's /scrape?target= endpoint proxies each target
# so a single exporter process covers all servers.
job_name = "redis";
metrics_path = "/scrape";
static_configs = [
{
targets = [
"redis://localhost:6379" # authentik
"redis://localhost:6363" # ccache
"redis://localhost:6380" # manyfold
"redis://localhost:6381" # onlyoffice
];
}
];
relabel_configs = [
{
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
target_label = "__address__";
replacement = "localhost:${toString config.services.prometheus.exporters.redis.port}";
}
];
}
{
job_name = "mysqld";
static_configs = [
{
targets = [ "localhost:${toString config.services.prometheus.exporters.mysqld.port}" ];
}
];
}
# ── Application services ─────────────────────────────────────────────
{
# Caddy exposes its built-in Prometheus endpoint on port 2019.
job_name = "caddy";
static_configs = [
{
targets = [ "localhost:2019" ];
}
];
}
{
# Gitea's /metrics endpoint is protected by a Bearer token.
job_name = "gitea";
metrics_path = "/metrics";
bearer_token_file = config.sops.secrets."jallen-nas/gitea/metrics-key".path;
static_configs = [
{
targets = [ "localhost:${toString giteaPort}" ];
}
];
}
{
# restic REST server exposes Prometheus metrics at /metrics.
job_name = "restic";
metrics_path = "/metrics";
static_configs = [
{
targets = [ "localhost:${toString resticPort}" ];
}
];
}
{
job_name = "nextcloud";
static_configs = [
{
targets = [ "localhost:${toString config.services.prometheus.exporters.nextcloud.port}" ];
}
];
}
];
};
grafana = {
enable = true;
settings = {
server = {
http_port = cfg.port;
http_addr = "0.0.0.0";
};
security = {
# Read the secret key from a SOPS-managed file at runtime so it
# never appears in the Nix store. The "$__file{}" syntax is
# Grafana's built-in file provider.
secret_key = "$__file{${config.sops.secrets."jallen-nas/grafana/secret-key".path}}";
};
# Grafana 12 enables kubernetesDashboards by default, which uses a
# new storage backend that validates datasource refs in dashboard
# files concurrently with datasource provisioning, causing a race
# that always fails on a clean install. Disable it to use the
# classic file provisioner that tolerates missing datasource refs.
"feature_toggles" = {
kubernetesDashboards = false;
};
# Grafana 12 introduced permitted_provisioning_paths as a security
# allowlist. The NixOS module stores all provisioning files in the
# Nix store, which is not in the default allowlist, causing the
# provisioner to silently refuse to load any files and then error
# with "data source not found".
paths.permitted_provisioning_paths = "/nix/store";
};
dataDir = "${cfg.configDir}/grafana";
provision = {
enable = true;
# Use path instead of settings to avoid the NixOS serializer
# writing `secureJsonData: null` which Grafana 12 chokes on.
datasources.path = pkgs.writeTextDir "datasource.yaml" ''
apiVersion: 1
datasources:
- name: Prometheus
uid: prometheus
type: prometheus
access: proxy
orgId: 1
url: http://localhost:${toString config.services.prometheus.port}
editable: false
jsonData:
httpMethod: POST
timeInterval: 15s
'';
# Provide empty-but-valid alerting provisioning documents.
# Without these, the NixOS module serialises `null` YAML which
# Grafana 12's provisioner fails to parse, producing a spurious
# "data source not found" error at startup.
alerting = {
rules.settings = {
apiVersion = 1;
groups = [ ];
};
contactPoints.settings = {
apiVersion = 1;
contactPoints = [ ];
};
policies.settings = {
apiVersion = 1;
policies = [ ];
};
templates.settings = {
apiVersion = 1;
templates = [ ];
};
muteTimings.settings = {
apiVersion = 1;
muteTimes = [ ];
};
};
dashboards.settings.providers = [
{
name = "community";
orgId = 1;
type = "file";
disableDeletion = true;
updateIntervalSeconds = 60;
allowUiUpdates = false;
options.path = communityDashboards;
}
{
name = "custom";
orgId = 1;
type = "file";
disableDeletion = true;
updateIntervalSeconds = 60;
allowUiUpdates = false;
options.path = customDashboards;
}
];
};
};
};
# The redis exporter needs AF_INET to reach TCP Redis instances.
# The default systemd hardening only allows AF_UNIX.
systemd.services.prometheus-redis-exporter.serviceConfig.RestrictAddressFamilies = [
"AF_UNIX"
"AF_INET"
"AF_INET6"
];
};
};
upsUser = "nas-admin";
in
{
imports = [ grafanaConfig ];
}