ntfy
This commit is contained in:
@@ -1,13 +1,96 @@
|
|||||||
{
|
{
|
||||||
config,
|
config,
|
||||||
lib,
|
lib,
|
||||||
|
pkgs,
|
||||||
namespace,
|
namespace,
|
||||||
...
|
...
|
||||||
}:
|
}:
|
||||||
with lib;
|
with lib;
|
||||||
let
|
let
|
||||||
inherit (lib.${namespace}) mkOpt;
|
inherit (lib.${namespace}) mkOpt mkBoolOpt;
|
||||||
cfg = config.${namespace}.power.ups;
|
cfg = config.${namespace}.power.ups;
|
||||||
|
|
||||||
|
# Script called by upsmon for every UPS event. Reads NTFY_USER and
|
||||||
|
# NTFY_PASSWORD from the environment (injected via EnvironmentFile on the
|
||||||
|
# upsmon systemd service). upsmon passes the event type as the first
|
||||||
|
# argument (e.g. ONBATT, ONLINE, LOWBATT, FSD, COMMOK, COMMBAD, etc).
|
||||||
|
upsNotifyScript = pkgs.writeShellScript "ups-ntfy-notify" ''
|
||||||
|
EVENT="$1"
|
||||||
|
HOST="$(${pkgs.hostname}/bin/hostname)"
|
||||||
|
SERVER="https://ntfy.mjallen.dev"
|
||||||
|
TOPIC="ups"
|
||||||
|
|
||||||
|
case "$EVENT" in
|
||||||
|
ONBATT)
|
||||||
|
TITLE="UPS on battery: $HOST"
|
||||||
|
PRIORITY="high"
|
||||||
|
TAGS="battery,rotating_light"
|
||||||
|
MESSAGE="Power failure detected. UPS is now running on battery."
|
||||||
|
;;
|
||||||
|
ONLINE)
|
||||||
|
TITLE="UPS back on mains: $HOST"
|
||||||
|
PRIORITY="low"
|
||||||
|
TAGS="electric_plug,white_check_mark"
|
||||||
|
MESSAGE="Power restored. UPS is back on mains power."
|
||||||
|
;;
|
||||||
|
LOWBATT)
|
||||||
|
TITLE="UPS battery LOW: $HOST"
|
||||||
|
PRIORITY="urgent"
|
||||||
|
TAGS="battery,sos"
|
||||||
|
MESSAGE="UPS battery is critically low. Shutdown imminent."
|
||||||
|
;;
|
||||||
|
FSD)
|
||||||
|
TITLE="UPS forced shutdown: $HOST"
|
||||||
|
PRIORITY="urgent"
|
||||||
|
TAGS="warning,sos"
|
||||||
|
MESSAGE="Forced shutdown initiated by UPS."
|
||||||
|
;;
|
||||||
|
COMMOK)
|
||||||
|
TITLE="UPS comms restored: $HOST"
|
||||||
|
PRIORITY="low"
|
||||||
|
TAGS="electric_plug,white_check_mark"
|
||||||
|
MESSAGE="Communication with UPS restored."
|
||||||
|
;;
|
||||||
|
COMMBAD)
|
||||||
|
TITLE="UPS comms lost: $HOST"
|
||||||
|
PRIORITY="high"
|
||||||
|
TAGS="warning,rotating_light"
|
||||||
|
MESSAGE="Lost communication with UPS."
|
||||||
|
;;
|
||||||
|
SHUTDOWN)
|
||||||
|
TITLE="UPS shutdown in progress: $HOST"
|
||||||
|
PRIORITY="urgent"
|
||||||
|
TAGS="warning,sos"
|
||||||
|
MESSAGE="System is shutting down due to UPS condition."
|
||||||
|
;;
|
||||||
|
REPLBATT)
|
||||||
|
TITLE="UPS battery needs replacement: $HOST"
|
||||||
|
PRIORITY="default"
|
||||||
|
TAGS="battery,warning"
|
||||||
|
MESSAGE="UPS reports battery needs replacement."
|
||||||
|
;;
|
||||||
|
NOCOMM)
|
||||||
|
TITLE="UPS unreachable: $HOST"
|
||||||
|
PRIORITY="high"
|
||||||
|
TAGS="warning,rotating_light"
|
||||||
|
MESSAGE="UPS is not reachable."
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
TITLE="UPS event on $HOST: $EVENT"
|
||||||
|
PRIORITY="default"
|
||||||
|
TAGS="electric_plug"
|
||||||
|
MESSAGE="UPS event: $EVENT"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
${pkgs.curl}/bin/curl -sf \
|
||||||
|
--user "$NTFY_USER:$NTFY_PASSWORD" \
|
||||||
|
-H "Title: $TITLE" \
|
||||||
|
-H "Priority: $PRIORITY" \
|
||||||
|
-H "Tags: $TAGS" \
|
||||||
|
-d "$MESSAGE" \
|
||||||
|
"$SERVER/$TOPIC" || true
|
||||||
|
'';
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
options.${namespace}.power.ups = {
|
options.${namespace}.power.ups = {
|
||||||
@@ -17,6 +100,11 @@ in
|
|||||||
upsUser = mkOpt types.str "nas-admin" "Name of the ups user";
|
upsUser = mkOpt types.str "nas-admin" "Name of the ups user";
|
||||||
|
|
||||||
upsdPort = mkOpt types.int 3493 "Port for upsd";
|
upsdPort = mkOpt types.int 3493 "Port for upsd";
|
||||||
|
|
||||||
|
ntfy = {
|
||||||
|
enable = mkBoolOpt false "Send ntfy notifications on UPS events";
|
||||||
|
envFile = mkOpt types.str "" "Path to env file containing NTFY_USER and NTFY_PASSWORD";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
config = mkIf cfg.enable {
|
config = mkIf cfg.enable {
|
||||||
@@ -61,6 +149,49 @@ in
|
|||||||
passwordFile = config.sops.secrets."jallen-nas/ups_password".path;
|
passwordFile = config.sops.secrets."jallen-nas/ups_password".path;
|
||||||
user = cfg.upsUser;
|
user = cfg.upsUser;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Call the notify script for all event types we care about.
|
||||||
|
settings = mkIf cfg.ntfy.enable {
|
||||||
|
NOTIFYCMD = "${upsNotifyScript}";
|
||||||
|
NOTIFYFLAG = [
|
||||||
|
[
|
||||||
|
"ONLINE"
|
||||||
|
"SYSLOG+WALL+EXEC"
|
||||||
|
]
|
||||||
|
[
|
||||||
|
"ONBATT"
|
||||||
|
"SYSLOG+WALL+EXEC"
|
||||||
|
]
|
||||||
|
[
|
||||||
|
"LOWBATT"
|
||||||
|
"SYSLOG+WALL+EXEC"
|
||||||
|
]
|
||||||
|
[
|
||||||
|
"FSD"
|
||||||
|
"SYSLOG+WALL+EXEC"
|
||||||
|
]
|
||||||
|
[
|
||||||
|
"COMMOK"
|
||||||
|
"SYSLOG+WALL+EXEC"
|
||||||
|
]
|
||||||
|
[
|
||||||
|
"COMMBAD"
|
||||||
|
"SYSLOG+WALL+EXEC"
|
||||||
|
]
|
||||||
|
[
|
||||||
|
"SHUTDOWN"
|
||||||
|
"SYSLOG+WALL+EXEC"
|
||||||
|
]
|
||||||
|
[
|
||||||
|
"REPLBATT"
|
||||||
|
"SYSLOG+WALL+EXEC"
|
||||||
|
]
|
||||||
|
[
|
||||||
|
"NOCOMM"
|
||||||
|
"SYSLOG+WALL+EXEC"
|
||||||
|
]
|
||||||
|
];
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
upsd = {
|
upsd = {
|
||||||
@@ -74,5 +205,8 @@ in
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Inject ntfy credentials into the upsmon service so the notify script
|
||||||
|
# can read NTFY_USER and NTFY_PASSWORD from the environment.
|
||||||
|
systemd.services.upsmon.serviceConfig.EnvironmentFile = mkIf cfg.ntfy.enable [ cfg.ntfy.envFile ];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,17 @@ let
|
|||||||
|
|
||||||
cfg = config.${namespace}.services.ai;
|
cfg = config.${namespace}.services.ai;
|
||||||
|
|
||||||
|
ntfyModelFailScript = pkgs.writeShellScript "update-qwen-model-notify-failure" ''
|
||||||
|
HOST="$(${pkgs.hostname}/bin/hostname)"
|
||||||
|
${pkgs.curl}/bin/curl -sf \
|
||||||
|
--user "$NTFY_USER:$NTFY_PASSWORD" \
|
||||||
|
-H "Title: Qwen model update FAILED on $HOST" \
|
||||||
|
-H "Priority: high" \
|
||||||
|
-H "Tags: rotating_light,robot_face" \
|
||||||
|
-d "The daily update-qwen-model job failed. Check: journalctl -u update-qwen-model.service" \
|
||||||
|
"https://ntfy.mjallen.dev/builds" || true
|
||||||
|
'';
|
||||||
|
|
||||||
aiConfig = lib.${namespace}.mkModule {
|
aiConfig = lib.${namespace}.mkModule {
|
||||||
inherit config;
|
inherit config;
|
||||||
name = "ai";
|
name = "ai";
|
||||||
@@ -127,11 +138,22 @@ let
|
|||||||
''}";
|
''}";
|
||||||
User = "nix-apps";
|
User = "nix-apps";
|
||||||
Group = "jallen-nas";
|
Group = "jallen-nas";
|
||||||
|
EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
|
||||||
};
|
};
|
||||||
|
unitConfig.OnFailure = "update-qwen-model-notify-failure.service";
|
||||||
# Run daily at 3 AM
|
# Run daily at 3 AM
|
||||||
startAt = "*-*-* 03:00:00";
|
startAt = "*-*-* 03:00:00";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
systemd.services.update-qwen-model-notify-failure = {
|
||||||
|
description = "Notify ntfy on update-qwen-model failure";
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
ExecStart = "${ntfyModelFailScript}";
|
||||||
|
EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
# Ensure model is available before llama-cpp starts
|
# Ensure model is available before llama-cpp starts
|
||||||
systemd.services.llama-cpp = {
|
systemd.services.llama-cpp = {
|
||||||
after = [ "update-qwen-model.service" ];
|
after = [ "update-qwen-model.service" ];
|
||||||
|
|||||||
@@ -10,6 +10,17 @@ let
|
|||||||
name = "attic";
|
name = "attic";
|
||||||
cfg = config.${namespace}.services.${name};
|
cfg = config.${namespace}.services.${name};
|
||||||
|
|
||||||
|
ntfyFailScript = pkgs.writeShellScript "nix-rebuild-cache-notify-failure" ''
|
||||||
|
HOST="$(${pkgs.hostname}/bin/hostname)"
|
||||||
|
${pkgs.curl}/bin/curl -sf \
|
||||||
|
--user "$NTFY_USER:$NTFY_PASSWORD" \
|
||||||
|
-H "Title: Nix cache rebuild FAILED on $HOST" \
|
||||||
|
-H "Priority: high" \
|
||||||
|
-H "Tags: rotating_light,nix_snowflake" \
|
||||||
|
-d "The weekly nix-rebuild-cache job failed. Check: journalctl -u nix-rebuild-cache.service" \
|
||||||
|
"https://ntfy.mjallen.dev/builds" || true
|
||||||
|
'';
|
||||||
|
|
||||||
atticConfig = lib.${namespace}.mkModule {
|
atticConfig = lib.${namespace}.mkModule {
|
||||||
inherit config name;
|
inherit config name;
|
||||||
description = "attic Service";
|
description = "attic Service";
|
||||||
@@ -60,7 +71,9 @@ let
|
|||||||
StandardError = "journal+console";
|
StandardError = "journal+console";
|
||||||
Restart = "no";
|
Restart = "no";
|
||||||
TimeoutStartSec = "2h";
|
TimeoutStartSec = "2h";
|
||||||
|
EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
|
||||||
};
|
};
|
||||||
|
unitConfig.OnFailure = "nix-rebuild-cache-notify-failure.service";
|
||||||
path = with pkgs; [
|
path = with pkgs; [
|
||||||
nix
|
nix
|
||||||
git
|
git
|
||||||
@@ -112,6 +125,15 @@ let
|
|||||||
fi;
|
fi;
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
nix-rebuild-cache-notify-failure = {
|
||||||
|
description = "Notify ntfy on nix-rebuild-cache failure";
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
ExecStart = "${ntfyFailScript}";
|
||||||
|
EnvironmentFile = [ config.sops.templates."ntfy.env".path ];
|
||||||
|
};
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
# Include timers for cache rebuilds
|
# Include timers for cache rebuilds
|
||||||
|
|||||||
@@ -6,15 +6,51 @@
|
|||||||
...
|
...
|
||||||
}:
|
}:
|
||||||
let
|
let
|
||||||
inherit (lib.${namespace}) mkOpt;
|
inherit (lib.${namespace}) mkOpt mkBoolOpt;
|
||||||
name = "crowdsec";
|
name = "crowdsec";
|
||||||
cfg = config.${namespace}.services.${name};
|
cfg = config.${namespace}.services.${name};
|
||||||
|
|
||||||
|
ntfyServer = "https://ntfy.mjallen.dev";
|
||||||
|
ntfyTopic = "crowdsec";
|
||||||
|
|
||||||
|
# CrowdSec HTTP notification plugin config — written to
|
||||||
|
# /etc/crowdsec/notifications/ntfy.yaml at runtime. Credentials are
|
||||||
|
# injected via EnvironmentFile so the plugin can reference them with
|
||||||
|
# {{env "NTFY_USER"}} / {{env "NTFY_PASSWORD"}} in the URL.
|
||||||
|
ntfyPluginConfig = pkgs.writeText "crowdsec-ntfy.yaml" ''
|
||||||
|
type: http
|
||||||
|
name: ntfy_plugin
|
||||||
|
log_level: info
|
||||||
|
format: |
|
||||||
|
{{range . -}}
|
||||||
|
CrowdSec blocked: {{.Scenario}}
|
||||||
|
Source IP: {{.Source.Value}}
|
||||||
|
Country: {{.Source.Cn}}
|
||||||
|
Decisions: {{.Decisions | len}}
|
||||||
|
{{range .Decisions -}}
|
||||||
|
Action: {{.Type}} for {{.Duration}}
|
||||||
|
{{end}}
|
||||||
|
{{- end}}
|
||||||
|
url: ${ntfyServer}/${ntfyTopic}
|
||||||
|
method: POST
|
||||||
|
headers:
|
||||||
|
Title: "CrowdSec: {{(index . 0).Scenario}}"
|
||||||
|
Priority: "high"
|
||||||
|
Tags: "rotating_light,shield"
|
||||||
|
Authorization: "Basic {{b64enc (print (env "NTFY_USER") ":" (env "NTFY_PASSWORD"))}}"
|
||||||
|
skip_tls_verify: false
|
||||||
|
timeout: 10s
|
||||||
|
'';
|
||||||
|
|
||||||
crowdsecConfig = lib.${namespace}.mkModule {
|
crowdsecConfig = lib.${namespace}.mkModule {
|
||||||
inherit config name;
|
inherit config name;
|
||||||
description = "crowdsec";
|
description = "crowdsec";
|
||||||
options = with lib; {
|
options = with lib; {
|
||||||
apiKey = mkOpt types.str "" "API key for crowdsec bouncer";
|
apiKey = mkOpt types.str "" "API key for crowdsec bouncer";
|
||||||
|
ntfy = {
|
||||||
|
enable = mkBoolOpt false "Send ntfy notifications on new CrowdSec alerts";
|
||||||
|
envFile = mkOpt types.str "" "Path to env file containing NTFY_USER and NTFY_PASSWORD";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
moduleConfig = {
|
moduleConfig = {
|
||||||
services = {
|
services = {
|
||||||
@@ -199,6 +235,57 @@ let
|
|||||||
user = "crowdsec";
|
user = "crowdsec";
|
||||||
group = "crowdsec";
|
group = "crowdsec";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ntfy notifications via the CrowdSec HTTP notification plugin
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Drop the plugin config YAML into /etc/crowdsec/notifications/.
|
||||||
|
# CrowdSec scans this directory on startup and registers any plugin
|
||||||
|
# config files it finds.
|
||||||
|
environment.etc."crowdsec/notifications/ntfy.yaml" = lib.mkIf cfg.ntfy.enable {
|
||||||
|
source = ntfyPluginConfig;
|
||||||
|
mode = "0440";
|
||||||
|
user = "crowdsec";
|
||||||
|
group = "crowdsec";
|
||||||
|
};
|
||||||
|
|
||||||
|
# CrowdSec profiles.yaml: route every alert to the ntfy plugin.
|
||||||
|
# This replaces the default "do nothing" profile.
|
||||||
|
environment.etc."crowdsec/profiles.yaml" = lib.mkIf cfg.ntfy.enable {
|
||||||
|
text = ''
|
||||||
|
name: default_ip_remediation
|
||||||
|
filters:
|
||||||
|
- Alert.Remediation == true && Alert.GetScope() == "Ip"
|
||||||
|
decisions:
|
||||||
|
- type: ban
|
||||||
|
duration: 4h
|
||||||
|
notifications:
|
||||||
|
- ntfy_plugin
|
||||||
|
on_success: break
|
||||||
|
---
|
||||||
|
name: default_range_remediation
|
||||||
|
filters:
|
||||||
|
- Alert.Remediation == true && Alert.GetScope() == "Range"
|
||||||
|
decisions:
|
||||||
|
- type: ban
|
||||||
|
duration: 4h
|
||||||
|
notifications:
|
||||||
|
- ntfy_plugin
|
||||||
|
on_success: break
|
||||||
|
'';
|
||||||
|
mode = "0440";
|
||||||
|
user = "crowdsec";
|
||||||
|
group = "crowdsec";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Inject NTFY_USER and NTFY_PASSWORD into the crowdsec service so the
|
||||||
|
# HTTP plugin template can reference them. The plugin config uses
|
||||||
|
# {{env "NTFY_BASIC_AUTH"}} — a pre-encoded "user:pass" base64 string
|
||||||
|
# for the Authorization: Basic header — computed in ExecStartPre.
|
||||||
|
systemd.services.crowdsec.serviceConfig.EnvironmentFile = lib.mkIf cfg.ntfy.enable [
|
||||||
|
cfg.ntfy.envFile
|
||||||
|
];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
in
|
in
|
||||||
|
|||||||
@@ -151,7 +151,7 @@ let
|
|||||||
'';
|
'';
|
||||||
|
|
||||||
giteaPort = config.${namespace}.services.gitea.port;
|
giteaPort = config.${namespace}.services.gitea.port;
|
||||||
resticPort = config.${namespace}.services.restic.port;
|
resticPort = config.${namespace}.services.restic-server.port;
|
||||||
nextcloudPort = config.${namespace}.services.nextcloud.port;
|
nextcloudPort = config.${namespace}.services.nextcloud.port;
|
||||||
|
|
||||||
grafanaConfig = lib.${namespace}.mkModule {
|
grafanaConfig = lib.${namespace}.mkModule {
|
||||||
@@ -392,27 +392,462 @@ let
|
|||||||
httpMethod: POST
|
httpMethod: POST
|
||||||
timeInterval: 15s
|
timeInterval: 15s
|
||||||
'';
|
'';
|
||||||
# Provide empty-but-valid alerting provisioning documents.
|
# ---------------------------------------------------------------------------
|
||||||
# Without these, the NixOS module serialises `null` YAML which
|
# Alerting provisioning
|
||||||
# Grafana 12's provisioner fails to parse, producing a spurious
|
# ---------------------------------------------------------------------------
|
||||||
# "data source not found" error at startup.
|
|
||||||
alerting = {
|
alerting = {
|
||||||
rules.settings = {
|
# ── Contact points ──────────────────────────────────────────────────
|
||||||
apiVersion = 1;
|
# ntfy via the Grafana webhook contact point. Grafana POSTs a JSON
|
||||||
groups = [ ];
|
# body; ntfy accepts any body as the message text. We use the
|
||||||
};
|
# message template below to format it nicely.
|
||||||
|
# Basic auth credentials are read from the SOPS secret at runtime
|
||||||
|
# via Grafana's $__file{} provider.
|
||||||
contactPoints.settings = {
|
contactPoints.settings = {
|
||||||
apiVersion = 1;
|
apiVersion = 1;
|
||||||
contactPoints = [ ];
|
contactPoints = [
|
||||||
};
|
{
|
||||||
policies.settings = {
|
name = "ntfy";
|
||||||
apiVersion = 1;
|
receivers = [
|
||||||
policies = [ ];
|
{
|
||||||
|
uid = "ntfy-webhook";
|
||||||
|
type = "webhook";
|
||||||
|
settings = {
|
||||||
|
url = "https://ntfy.mjallen.dev/grafana-alerts";
|
||||||
|
httpMethod = "POST";
|
||||||
|
username = "$__file{${config.sops.secrets."jallen-nas/ntfy/user".path}}";
|
||||||
|
password = "$__file{${config.sops.secrets."jallen-nas/ntfy/password".path}}";
|
||||||
|
# Pass alert title and state as ntfy headers via the
|
||||||
|
# custom message template (defined below).
|
||||||
|
httpHeaders = {
|
||||||
|
"Tags" = "chart,bell";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
disableResolveMessage = false;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# ── Notification message template ───────────────────────────────────
|
||||||
|
# Grafana sends the rendered template body as the POST body.
|
||||||
|
# ntfy treats the body as the message text.
|
||||||
templates.settings = {
|
templates.settings = {
|
||||||
apiVersion = 1;
|
apiVersion = 1;
|
||||||
templates = [ ];
|
templates = [
|
||||||
|
{
|
||||||
|
name = "ntfy_message";
|
||||||
|
template = ''
|
||||||
|
{{ define "ntfy_message" -}}
|
||||||
|
{{ .CommonAnnotations.summary | default .GroupLabels.alertname }}
|
||||||
|
{{ range .Alerts -}}
|
||||||
|
Status: {{ .Status | title }}
|
||||||
|
Alert: {{ .Labels.alertname }}
|
||||||
|
Severity: {{ .Labels.severity | default "unknown" }}
|
||||||
|
Instance: {{ .Labels.instance | default "unknown" }}
|
||||||
|
{{ if .Annotations.description -}}
|
||||||
|
Details: {{ .Annotations.description }}
|
||||||
|
{{ end -}}
|
||||||
|
{{ end -}}
|
||||||
|
{{ end }}
|
||||||
|
'';
|
||||||
|
}
|
||||||
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# ── Notification routing policy ─────────────────────────────────────
|
||||||
|
policies.settings = {
|
||||||
|
apiVersion = 1;
|
||||||
|
policies = [
|
||||||
|
{
|
||||||
|
receiver = "ntfy";
|
||||||
|
group_by = [
|
||||||
|
"alertname"
|
||||||
|
"severity"
|
||||||
|
];
|
||||||
|
group_wait = "30s";
|
||||||
|
group_interval = "5m";
|
||||||
|
repeat_interval = "4h";
|
||||||
|
routes = [
|
||||||
|
# Critical alerts: repeat every 1h, no grouping wait
|
||||||
|
{
|
||||||
|
receiver = "ntfy";
|
||||||
|
matchers = [ "severity = critical" ];
|
||||||
|
group_wait = "0s";
|
||||||
|
repeat_interval = "1h";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
# ── Alert rules ─────────────────────────────────────────────────────
|
||||||
|
rules.settings = {
|
||||||
|
apiVersion = 1;
|
||||||
|
groups = [
|
||||||
|
{
|
||||||
|
name = "nas-system";
|
||||||
|
folder = "NAS Alerts";
|
||||||
|
interval = "1m";
|
||||||
|
rules = [
|
||||||
|
# Disk usage > 85% warning, > 95% critical
|
||||||
|
{
|
||||||
|
uid = "nas-disk-warning";
|
||||||
|
title = "Disk usage high";
|
||||||
|
condition = "C";
|
||||||
|
data = [
|
||||||
|
{
|
||||||
|
refId = "A";
|
||||||
|
datasourceUid = "prometheus";
|
||||||
|
model = {
|
||||||
|
expr = ''
|
||||||
|
(
|
||||||
|
node_filesystem_size_bytes{fstype!~"tmpfs|overlay|squashfs",mountpoint!~"/boot.*"}
|
||||||
|
- node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|squashfs",mountpoint!~"/boot.*"}
|
||||||
|
)
|
||||||
|
/ node_filesystem_size_bytes{fstype!~"tmpfs|overlay|squashfs",mountpoint!~"/boot.*"}
|
||||||
|
* 100
|
||||||
|
'';
|
||||||
|
intervalMs = 60000;
|
||||||
|
maxDataPoints = 43200;
|
||||||
|
refId = "A";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "B";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "reduce";
|
||||||
|
refId = "B";
|
||||||
|
expression = "A";
|
||||||
|
reducer = "last";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "C";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "threshold";
|
||||||
|
refId = "C";
|
||||||
|
expression = "B";
|
||||||
|
conditions = [
|
||||||
|
{
|
||||||
|
evaluator = {
|
||||||
|
type = "gt";
|
||||||
|
params = [ 85 ];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
noDataState = "NoData";
|
||||||
|
execErrState = "Error";
|
||||||
|
for = "5m";
|
||||||
|
annotations = {
|
||||||
|
summary = "Disk usage above 85%";
|
||||||
|
description = "Filesystem {{ $labels.mountpoint }} is {{ $values.B | printf \"%.1f\" }}% full.";
|
||||||
|
};
|
||||||
|
labels = {
|
||||||
|
severity = "warning";
|
||||||
|
};
|
||||||
|
isPaused = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Memory usage > 90%
|
||||||
|
{
|
||||||
|
uid = "nas-memory-high";
|
||||||
|
title = "Memory usage high";
|
||||||
|
condition = "C";
|
||||||
|
data = [
|
||||||
|
{
|
||||||
|
refId = "A";
|
||||||
|
datasourceUid = "prometheus";
|
||||||
|
model = {
|
||||||
|
expr = ''
|
||||||
|
(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100
|
||||||
|
'';
|
||||||
|
intervalMs = 60000;
|
||||||
|
maxDataPoints = 43200;
|
||||||
|
refId = "A";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "B";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "reduce";
|
||||||
|
refId = "B";
|
||||||
|
expression = "A";
|
||||||
|
reducer = "last";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "C";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "threshold";
|
||||||
|
refId = "C";
|
||||||
|
expression = "B";
|
||||||
|
conditions = [
|
||||||
|
{
|
||||||
|
evaluator = {
|
||||||
|
type = "gt";
|
||||||
|
params = [ 90 ];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
noDataState = "NoData";
|
||||||
|
execErrState = "Error";
|
||||||
|
for = "5m";
|
||||||
|
annotations = {
|
||||||
|
summary = "Memory usage above 90%";
|
||||||
|
description = "Memory usage is {{ $values.B | printf \"%.1f\" }}%.";
|
||||||
|
};
|
||||||
|
labels = {
|
||||||
|
severity = "warning";
|
||||||
|
};
|
||||||
|
isPaused = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
# CPU > 90% sustained for 10m
|
||||||
|
{
|
||||||
|
uid = "nas-cpu-high";
|
||||||
|
title = "CPU usage sustained high";
|
||||||
|
condition = "C";
|
||||||
|
data = [
|
||||||
|
{
|
||||||
|
refId = "A";
|
||||||
|
datasourceUid = "prometheus";
|
||||||
|
model = {
|
||||||
|
expr = ''
|
||||||
|
100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
|
||||||
|
'';
|
||||||
|
intervalMs = 60000;
|
||||||
|
maxDataPoints = 43200;
|
||||||
|
refId = "A";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "B";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "reduce";
|
||||||
|
refId = "B";
|
||||||
|
expression = "A";
|
||||||
|
reducer = "last";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "C";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "threshold";
|
||||||
|
refId = "C";
|
||||||
|
expression = "B";
|
||||||
|
conditions = [
|
||||||
|
{
|
||||||
|
evaluator = {
|
||||||
|
type = "gt";
|
||||||
|
params = [ 90 ];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
noDataState = "NoData";
|
||||||
|
execErrState = "Error";
|
||||||
|
for = "10m";
|
||||||
|
annotations = {
|
||||||
|
summary = "CPU sustained above 90%";
|
||||||
|
description = "CPU usage has been above 90% for 10 minutes (currently {{ $values.B | printf \"%.1f\" }}%).";
|
||||||
|
};
|
||||||
|
labels = {
|
||||||
|
severity = "warning";
|
||||||
|
};
|
||||||
|
isPaused = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
# UPS on battery (network_ups_tools_ups_status == 0 means OB/on-battery)
|
||||||
|
{
|
||||||
|
uid = "nas-ups-onbatt";
|
||||||
|
title = "UPS on battery";
|
||||||
|
condition = "C";
|
||||||
|
data = [
|
||||||
|
{
|
||||||
|
refId = "A";
|
||||||
|
datasourceUid = "prometheus";
|
||||||
|
model = {
|
||||||
|
expr = "network_ups_tools_ups_status";
|
||||||
|
intervalMs = 60000;
|
||||||
|
maxDataPoints = 43200;
|
||||||
|
refId = "A";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "B";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "reduce";
|
||||||
|
refId = "B";
|
||||||
|
expression = "A";
|
||||||
|
reducer = "last";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "C";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "threshold";
|
||||||
|
refId = "C";
|
||||||
|
expression = "B";
|
||||||
|
# status 0 = OB (on battery), 1 = OL (online)
|
||||||
|
conditions = [
|
||||||
|
{
|
||||||
|
evaluator = {
|
||||||
|
type = "lt";
|
||||||
|
params = [ 1 ];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
noDataState = "NoData";
|
||||||
|
execErrState = "Error";
|
||||||
|
for = "1m";
|
||||||
|
annotations = {
|
||||||
|
summary = "UPS is running on battery";
|
||||||
|
description = "Mains power failure detected. UPS battery charge: {{ with query \"network_ups_tools_battery_charge\" }}{{ . | first | value | printf \"%.0f\" }}%{{ end }}.";
|
||||||
|
};
|
||||||
|
labels = {
|
||||||
|
severity = "critical";
|
||||||
|
};
|
||||||
|
isPaused = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
# UPS battery charge < 30%
|
||||||
|
{
|
||||||
|
uid = "nas-ups-lowbatt";
|
||||||
|
title = "UPS battery low";
|
||||||
|
condition = "C";
|
||||||
|
data = [
|
||||||
|
{
|
||||||
|
refId = "A";
|
||||||
|
datasourceUid = "prometheus";
|
||||||
|
model = {
|
||||||
|
expr = "network_ups_tools_battery_charge";
|
||||||
|
intervalMs = 60000;
|
||||||
|
maxDataPoints = 43200;
|
||||||
|
refId = "A";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "B";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "reduce";
|
||||||
|
refId = "B";
|
||||||
|
expression = "A";
|
||||||
|
reducer = "last";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "C";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "threshold";
|
||||||
|
refId = "C";
|
||||||
|
expression = "B";
|
||||||
|
conditions = [
|
||||||
|
{
|
||||||
|
evaluator = {
|
||||||
|
type = "lt";
|
||||||
|
params = [ 30 ];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
noDataState = "NoData";
|
||||||
|
execErrState = "Error";
|
||||||
|
for = "2m";
|
||||||
|
annotations = {
|
||||||
|
summary = "UPS battery charge below 30%";
|
||||||
|
description = "UPS battery is at {{ $values.B | printf \"%.0f\" }}%. Shutdown may be imminent.";
|
||||||
|
};
|
||||||
|
labels = {
|
||||||
|
severity = "critical";
|
||||||
|
};
|
||||||
|
isPaused = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
# PostgreSQL not responding
|
||||||
|
{
|
||||||
|
uid = "nas-postgres-down";
|
||||||
|
title = "PostgreSQL down";
|
||||||
|
condition = "C";
|
||||||
|
data = [
|
||||||
|
{
|
||||||
|
refId = "A";
|
||||||
|
datasourceUid = "prometheus";
|
||||||
|
model = {
|
||||||
|
expr = "pg_up";
|
||||||
|
intervalMs = 60000;
|
||||||
|
maxDataPoints = 43200;
|
||||||
|
refId = "A";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "B";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "reduce";
|
||||||
|
refId = "B";
|
||||||
|
expression = "A";
|
||||||
|
reducer = "last";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
refId = "C";
|
||||||
|
datasourceUid = "__expr__";
|
||||||
|
model = {
|
||||||
|
type = "threshold";
|
||||||
|
refId = "C";
|
||||||
|
expression = "B";
|
||||||
|
conditions = [
|
||||||
|
{
|
||||||
|
evaluator = {
|
||||||
|
type = "lt";
|
||||||
|
params = [ 1 ];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
noDataState = "Alerting";
|
||||||
|
execErrState = "Error";
|
||||||
|
for = "2m";
|
||||||
|
annotations = {
|
||||||
|
summary = "PostgreSQL is down";
|
||||||
|
description = "The PostgreSQL exporter reports pg_up=0. Database may be unavailable.";
|
||||||
|
};
|
||||||
|
labels = {
|
||||||
|
severity = "critical";
|
||||||
|
};
|
||||||
|
isPaused = false;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
muteTimings.settings = {
|
muteTimings.settings = {
|
||||||
apiVersion = 1;
|
apiVersion = 1;
|
||||||
muteTimes = [ ];
|
muteTimes = [ ];
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -87,6 +87,10 @@ in
|
|||||||
enable = true;
|
enable = true;
|
||||||
port = 8181;
|
port = 8181;
|
||||||
apiKey = config.sops.secrets."jallen-nas/crowdsec-capi".path;
|
apiKey = config.sops.secrets."jallen-nas/crowdsec-capi".path;
|
||||||
|
ntfy = {
|
||||||
|
enable = true;
|
||||||
|
envFile = config.sops.templates."ntfy.env".path;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
dispatcharr = {
|
dispatcharr = {
|
||||||
enable = false;
|
enable = false;
|
||||||
@@ -208,7 +212,7 @@ in
|
|||||||
smtpPort = 1025;
|
smtpPort = 1025;
|
||||||
imapPort = 1143;
|
imapPort = 1143;
|
||||||
};
|
};
|
||||||
restic = {
|
restic-server = {
|
||||||
enable = true;
|
enable = true;
|
||||||
port = 8008;
|
port = 8008;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -192,7 +192,13 @@ in
|
|||||||
# # Power # #
|
# # Power # #
|
||||||
# ###################################################
|
# ###################################################
|
||||||
|
|
||||||
power.ups = enabled;
|
power.ups = {
|
||||||
|
enable = true;
|
||||||
|
ntfy = {
|
||||||
|
enable = true;
|
||||||
|
envFile = config.sops.templates."ntfy.env".path;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
# ###################################################
|
# ###################################################
|
||||||
# # Samba # #
|
# # Samba # #
|
||||||
@@ -304,31 +310,34 @@ in
|
|||||||
|
|
||||||
# Configure environment
|
# Configure environment
|
||||||
environment = {
|
environment = {
|
||||||
systemPackages = with pkgs; [
|
systemPackages =
|
||||||
attic-client
|
with pkgs;
|
||||||
bcachefs-tools
|
[
|
||||||
cryptsetup
|
attic-client
|
||||||
clevis
|
bcachefs-tools
|
||||||
deconz
|
cryptsetup
|
||||||
duperemove
|
clevis
|
||||||
efibootmgr
|
deconz
|
||||||
ffmpeg
|
duperemove
|
||||||
ipset
|
efibootmgr
|
||||||
keyutils
|
ffmpeg
|
||||||
nut
|
ipset
|
||||||
packagekit
|
keyutils
|
||||||
pass
|
nut
|
||||||
protonmail-bridge
|
packagekit
|
||||||
protonvpn-gui
|
pass
|
||||||
qrencode
|
protonmail-bridge
|
||||||
sbctl
|
protonvpn-gui
|
||||||
systemctl-tui
|
qrencode
|
||||||
tigervnc
|
sbctl
|
||||||
tpm2-tools
|
systemctl-tui
|
||||||
tpm2-tss
|
tigervnc
|
||||||
] ++ (with pkgs.${namespace}; [
|
tpm2-tools
|
||||||
nebula-sign-cert
|
tpm2-tss
|
||||||
]);
|
]
|
||||||
|
++ (with pkgs.${namespace}; [
|
||||||
|
nebula-sign-cert
|
||||||
|
]);
|
||||||
persistence."/media/nas/main/persist" = {
|
persistence."/media/nas/main/persist" = {
|
||||||
hideMounts = true;
|
hideMounts = true;
|
||||||
directories = [
|
directories = [
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ in
|
|||||||
paperless = mkForce disabled;
|
paperless = mkForce disabled;
|
||||||
paperless-ai = mkForce disabled;
|
paperless-ai = mkForce disabled;
|
||||||
protonmail-bridge = mkForce disabled;
|
protonmail-bridge = mkForce disabled;
|
||||||
restic = mkForce disabled;
|
restic-server = mkForce disabled;
|
||||||
sunshine = mkForce disabled;
|
sunshine = mkForce disabled;
|
||||||
tdarr = mkForce disabled;
|
tdarr = mkForce disabled;
|
||||||
unmanic = mkForce disabled;
|
unmanic = mkForce disabled;
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ in
|
|||||||
"paperless"
|
"paperless"
|
||||||
"paperless-ai"
|
"paperless-ai"
|
||||||
"protonmail-bridge"
|
"protonmail-bridge"
|
||||||
"restic"
|
"restic-server"
|
||||||
"sparky-fitness"
|
"sparky-fitness"
|
||||||
"sparky-fitness-server"
|
"sparky-fitness-server"
|
||||||
"sunshine"
|
"sunshine"
|
||||||
|
|||||||
@@ -259,6 +259,28 @@ in
|
|||||||
"jallen-nas/ntfy/auth-users" = {
|
"jallen-nas/ntfy/auth-users" = {
|
||||||
sopsFile = defaultSops;
|
sopsFile = defaultSops;
|
||||||
};
|
};
|
||||||
|
"jallen-nas/ntfy/user" = {
|
||||||
|
sopsFile = defaultSops;
|
||||||
|
mode = "0440";
|
||||||
|
owner = "grafana";
|
||||||
|
group = "keys";
|
||||||
|
restartUnits = [
|
||||||
|
"grafana.service"
|
||||||
|
"crowdsec.service"
|
||||||
|
"upsmon.service"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
"jallen-nas/ntfy/password" = {
|
||||||
|
sopsFile = defaultSops;
|
||||||
|
mode = "0440";
|
||||||
|
owner = "grafana";
|
||||||
|
group = "keys";
|
||||||
|
restartUnits = [
|
||||||
|
"grafana.service"
|
||||||
|
"crowdsec.service"
|
||||||
|
"upsmon.service"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# sparky-fitness
|
# sparky-fitness
|
||||||
@@ -330,6 +352,20 @@ in
|
|||||||
restartUnits = [ "podman-authenticRac.service" ];
|
restartUnits = [ "podman-authenticRac.service" ];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
"ntfy.env" = {
|
||||||
|
content = ''
|
||||||
|
NTFY_USER=${config.sops.placeholder."jallen-nas/ntfy/user"}
|
||||||
|
NTFY_PASSWORD=${config.sops.placeholder."jallen-nas/ntfy/password"}
|
||||||
|
'';
|
||||||
|
mode = "0600";
|
||||||
|
restartUnits = [
|
||||||
|
"crowdsec.service"
|
||||||
|
"upsmon.service"
|
||||||
|
"nix-rebuild-cache.service"
|
||||||
|
"update-qwen-model.service"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
"paperless.env" = {
|
"paperless.env" = {
|
||||||
content = ''
|
content = ''
|
||||||
PAPERLESS_ADMIN_USER = "mjallen"
|
PAPERLESS_ADMIN_USER = "mjallen"
|
||||||
|
|||||||
Reference in New Issue
Block a user