This commit is contained in:
mjallen18
2026-04-07 20:36:32 -05:00
parent 928de1837b
commit 3234029ae5
10 changed files with 1039 additions and 2 deletions

View File

@@ -156,9 +156,53 @@ let
bouncerName = "nas-bouncer";
};
# secrets.apiKeyPath = config.sops.secrets."jallen-nas/crowdsec-firewall-bouncer-api-key".path;
settings = {
# The default api_url is derived from the LAPI's listen_uri, which is
# "0.0.0.0:8181" — a valid bind address but not a connectable URL.
# Override to the loopback address the bouncer should actually connect to.
api_url = "http://127.0.0.1:${toString cfg.port}";
};
};
};
# During activation (which runs as root), check whether the machine credential in
# client.yaml exists in the crowdsec SQLite DB. If not (e.g. after a DB wipe),
# clear client.yaml so the subsequent crowdsec-setup ExecStartPre re-registers.
# This runs before switch-to-configuration starts/restarts services, breaking the
# boot-time cycle where the ExecStartPre fix can't apply until the service succeeds.
system.activationScripts.crowdsec-check-machine-creds =
let
machineName = config.services.crowdsec.name;
in
{
text = ''
clientYaml="${cfg.configDir}/crowdsec/client.yaml"
dbPath="/var/lib/crowdsec/state/crowdsec.db"
if [ -f "$dbPath" ]; then
if [ -s "$clientYaml" ]; then
login=$(${pkgs.gnugrep}/bin/grep -oP '(?<=login: ).*' "$clientYaml" || true)
if [ -n "$login" ]; then
found=$(${pkgs.sqlite}/bin/sqlite3 "$dbPath" \
"SELECT COUNT(*) FROM machines WHERE machine_id='$login';" 2>/dev/null || echo "0")
if [ "$found" = "0" ]; then
echo "crowdsec activation: machine '$login' missing from DB resetting credentials"
${pkgs.coreutils}/bin/rm -f "$clientYaml"
# Also remove any stale entry under the configured machine name so
# 'cscli machines add ${machineName} --auto' doesn't fail with "user already exist"
${pkgs.sqlite}/bin/sqlite3 "$dbPath" \
"DELETE FROM machines WHERE machine_id='${machineName}';" 2>/dev/null || true
fi
fi
else
# client.yaml absent/empty ensure no stale name entry blocks re-registration
${pkgs.sqlite}/bin/sqlite3 "$dbPath" \
"DELETE FROM machines WHERE machine_id='${machineName}';" 2>/dev/null || true
fi
fi
'';
deps = [ ];
};
# The upstream crowdsec module uses ReadWritePaths (not StateDirectory) on
# crowdsec.service, meaning it expects /var/lib/crowdsec to exist as a real
# directory (created by tmpfiles). However, crowdsec-firewall-bouncer-register
@@ -181,7 +225,64 @@ let
services = {
crowdsec = {
serviceConfig = lib.mkMerge [
{ DynamicUser = lib.mkForce false; }
{
DynamicUser = lib.mkForce false;
# ProtectSystem=strict (set upstream) makes all paths read-only except
# those in ReadWritePaths. The credentials file lives on the NAS mount
# which is not listed by default, so cscli machines add fails with EROFS.
ReadWritePaths = [ "${cfg.configDir}/crowdsec" ];
# If the machine credentials in client.yaml don't match any machine in the
# SQLite DB (e.g. after a DB wipe while client.yaml persists), crowdsec
# fatals on startup. Detect this mismatch before crowdsec-setup runs:
# read the machine login from client.yaml, query the DB directly, and
# delete client.yaml if the machine is absent so the next crowdsec-setup
# invocation re-registers a fresh machine.
# Use mkBefore so this runs before the upstream crowdsec-setup ExecStartPre
# entries, giving crowdsec-setup a cleared client.yaml to re-register from.
ExecStartPre = lib.mkBefore [
(
"+"
+ (
let
machineName = config.services.crowdsec.name;
in
pkgs.writeShellScript "crowdsec-check-machine-creds" ''
set -euo pipefail
clientYaml="${cfg.configDir}/crowdsec/client.yaml"
dbPath="/var/lib/crowdsec/state/crowdsec.db"
sqlite="${pkgs.sqlite}/bin/sqlite3"
rm="${pkgs.coreutils}/bin/rm"
[ -f "$dbPath" ] || exit 0 # No DB yet; fresh install, nothing to fix
if [ -s "$clientYaml" ]; then
# Credentials file exists verify the login it contains is in the DB
login=$(${pkgs.gnugrep}/bin/grep -oP '(?<=login: ).*' "$clientYaml" || true)
if [ -n "$login" ]; then
found=$("$sqlite" "$dbPath" \
"SELECT COUNT(*) FROM machines WHERE machine_id='$login';" 2>/dev/null || echo "0")
if [ "$found" = "0" ]; then
echo "crowdsec: machine '$login' missing from DB resetting credentials"
"$rm" -f "$clientYaml"
"$sqlite" "$dbPath" \
"DELETE FROM machines WHERE machine_id='${machineName}';" 2>/dev/null || true
fi
fi
else
# Credentials file absent ensure no stale name row blocks machines add
stale=$("$sqlite" "$dbPath" \
"SELECT COUNT(*) FROM machines WHERE machine_id='${machineName}';" 2>/dev/null || echo "0")
if [ "$stale" != "0" ]; then
echo "crowdsec: client.yaml absent but '${machineName}' in DB removing stale row"
"$sqlite" "$dbPath" \
"DELETE FROM machines WHERE machine_id='${machineName}';" 2>/dev/null || true
fi
fi
''
)
)
];
}
(lib.mkIf (cfg.ntfy.enable && cfg.ntfy.envFile != "") {
EnvironmentFile = [ cfg.ntfy.envFile ];
})

View File

@@ -890,7 +890,24 @@ let
restartUnits = [ "grafana.service" ];
};
systemd.services.grafana.serviceConfig.EnvironmentFile = config.sops.templates."grafana.env".path;
systemd.services.grafana.serviceConfig = {
EnvironmentFile = config.sops.templates."grafana.env".path;
# Grafana downloads plugins at runtime and occasionally creates subdirectories
# with overly restrictive permissions (e.g. 0700 for locales/*), which causes
# the next startup to fail with "permission denied" during plugin discovery.
# Fix any such directories before Grafana starts.
ExecStartPre = [
(
"+"
+ pkgs.writeShellScript "grafana-fix-plugin-perms" ''
pluginDir="${cfg.configDir}/grafana/plugins"
if [ -d "$pluginDir" ]; then
${pkgs.coreutils}/bin/chmod -R a+rX "$pluginDir"
fi
''
)
];
};
# The redis exporter needs AF_INET to reach TCP Redis instances.
# The default systemd hardening only allows AF_UNIX.

View File

@@ -0,0 +1,135 @@
{
config,
lib,
namespace,
pkgs,
...
}:
with lib;
let
name = "nebula-ui";
cfg = config.${namespace}.services.${name};
statsListenAddr = "${cfg.statsListenAddress}:${toString cfg.statsPort}";
nebulaUiConfig = lib.${namespace}.mkModule {
inherit config name;
description = "Nebula network web UI (stats + cert signing)";
options = {
# Override mkModule defaults: bind to localhost only; firewall closed by
# default since this service sits behind a Caddy reverse proxy.
listenAddress = lib.${namespace}.mkOpt types.str "127.0.0.1" "Address nebula-ui listens on";
openFirewall =
lib.${namespace}.mkBoolOpt false
"Open firewall for nebula-ui (not needed behind a reverse proxy)";
# ── Stats endpoint ───────────────────────────────────────────────────────
statsListenAddress =
lib.${namespace}.mkOpt types.str "127.0.0.1"
"Address nebula's stats HTTP endpoint listens on";
statsPort = lib.${namespace}.mkOpt types.port 8474 "Port nebula's stats HTTP endpoint listens on";
# ── CA secrets ───────────────────────────────────────────────────────────
# The CA cert/key are already decrypted by the nebula sops.nix.
# We need a *separate* sops secret for the CA key exposed to nebula-ui
# because the nebula module only exposes it to nebula-<network>.
caCertSecretKey =
lib.${namespace}.mkOpt types.str ""
"SOPS secret key for the CA certificate (e.g. \"pi5/nebula/ca-cert\")";
caKeySecretKey =
lib.${namespace}.mkOpt types.str ""
"SOPS secret key for the CA private key (e.g. \"pi5/nebula/ca-key\")";
secretsFile =
lib.${namespace}.mkOpt types.str ""
"Path to the SOPS secrets YAML that holds the CA cert + key";
# ── Network identity ─────────────────────────────────────────────────────
networkName =
lib.${namespace}.mkOpt types.str "jallen-nebula"
"Nebula network name (must match services.nebula.networkName)";
};
moduleConfig = {
assertions = [
{
assertion = cfg.caCertSecretKey != "";
message = "mjallen.services.nebula-ui.caCertSecretKey must be set";
}
{
assertion = cfg.caKeySecretKey != "";
message = "mjallen.services.nebula-ui.caKeySecretKey must be set";
}
{
assertion = cfg.secretsFile != "";
message = "mjallen.services.nebula-ui.secretsFile must be set";
}
];
# ── SOPS secrets owned by the nebula-ui service user ───────────────────
sops.secrets."${cfg.caCertSecretKey}" = {
sopsFile = cfg.secretsFile;
owner = name;
group = name;
restartUnits = [ "nebula-ui.service" ];
};
sops.secrets."${cfg.caKeySecretKey}" = {
sopsFile = cfg.secretsFile;
owner = name;
group = name;
restartUnits = [ "nebula-ui.service" ];
};
# ── User / group ────────────────────────────────────────────────────────
users.users.${name} = {
isSystemUser = true;
group = name;
description = "Nebula UI service user";
};
users.groups.${name} = { };
# ── Systemd service ─────────────────────────────────────────────────────
systemd.services.${name} = {
description = "Nebula network web UI";
wantedBy = [ "multi-user.target" ];
after = [
"network.target"
"sops-nix.service"
];
environment = {
NEBULA_UI_CA_CERT_PATH = config.sops.secrets."${cfg.caCertSecretKey}".path;
NEBULA_UI_CA_KEY_PATH = config.sops.secrets."${cfg.caKeySecretKey}".path;
NEBULA_UI_STATS_URL = "http://${statsListenAddr}";
NEBULA_UI_NETWORK_NAME = cfg.networkName;
NEBULA_UI_LISTEN_HOST = cfg.listenAddress;
NEBULA_UI_LISTEN_PORT = toString cfg.port;
};
serviceConfig = {
ExecStart = "${pkgs.${namespace}.nebula-ui}/bin/nebula-ui";
User = name;
Group = name;
Restart = "on-failure";
RestartSec = "5s";
# Hardening
NoNewPrivileges = true;
PrivateTmp = true;
ProtectSystem = "strict";
ProtectHome = true;
ReadOnlyPaths = [
config.sops.secrets."${cfg.caCertSecretKey}".path
config.sops.secrets."${cfg.caKeySecretKey}".path
];
};
};
};
};
in
{
imports = [ nebulaUiConfig ];
}

View File

@@ -95,6 +95,17 @@ let
host = "any";
}
] "Nebula outbound firewall rules";
# -----------------------------------------------------------------------
# Stats / metrics HTTP endpoint
# -----------------------------------------------------------------------
stats = {
enable = lib.${namespace}.mkBoolOpt false "Enable the Nebula HTTP stats endpoint";
listenAddress = lib.${namespace}.mkOpt types.str "127.0.0.1" "Address the stats endpoint binds to";
statsPort = lib.${namespace}.mkOpt types.port 8474 "Port the stats endpoint listens on";
};
};
moduleConfig = {
environment.systemPackages = with pkgs; [ nebula ];
@@ -136,6 +147,12 @@ let
inbound = cfg.inboundRules;
outbound = cfg.outboundRules;
};
settings.stats = lib.mkIf cfg.stats.enable {
type = "json";
listen = "${cfg.stats.listenAddress}:${toString cfg.stats.statsPort}";
interval = "10s";
};
};
};
};

View File

@@ -149,6 +149,35 @@ let
nextcloud-setup = {
after = [ "postgresql.service" ];
requires = [ "postgresql.service" ];
serviceConfig =
let
# Extract the override.config.php store-path from the already-evaluated
# tmpfiles rules list at Nix eval time, so we never have to parse files at
# runtime. The upstream module emits exactly one rule of the form:
# "L+ <dest> - - - - <storepath>"
overrideLine = lib.findFirst (
r: lib.hasInfix "override.config.php" r
) null config.systemd.tmpfiles.rules;
overrideStorePath =
if overrideLine != null then lib.last (lib.splitString " " overrideLine) else null;
in
lib.mkIf (overrideStorePath != null) {
# systemd-tmpfiles refuses to create the override.config.php symlink because
# /media/nas/main is owned by nix-apps (not root/nextcloud), triggering an
# "unsafe path transition" error. Work around this by creating the symlink
# directly as root (the '+' prefix) before the setup script's ownership check.
# The target store path is resolved at Nix eval time so it is always current.
ExecStartPre = [
(
"+"
+ pkgs.writeShellScript "nextcloud-fix-override-config" ''
dest="${cfg.dataDir}/nextcloud/config/override.config.php"
echo "Creating symlink: $dest -> ${overrideStorePath}"
${pkgs.coreutils}/bin/ln -sf "${overrideStorePath}" "$dest"
''
)
];
};
};
nextcloud-update-db = {
after = [ "postgresql.service" ];