{ config, lib, pkgs, namespace, ... }: let inherit (lib) mkForce getExe; inherit (lib.${namespace}) mkModule mkOpt; name = "lemonade"; cfg = config.${namespace}.services.${name}; # lemonade-server serve args built from config options serveArgs = lib.concatStringsSep " " ( [ "serve" "--no-tray" "--port ${toString cfg.port}" "--host ${cfg.host}" "--log-level ${cfg.logLevel}" ] ++ lib.optional (cfg.maxLoadedModels != 1) "--max-loaded-models ${toString cfg.maxLoadedModels}" ++ lib.optional (cfg.extraModelsDir != null) "--extra-models-dir ${cfg.extraModelsDir}" ++ cfg.extraArgs ); lemonadeConfig = mkModule { inherit config name; description = "Lemonade local LLM server"; options = { # Override mkModule's default port of 80 with lemonade's actual default. port = mkOpt lib.types.int 8000 "Port lemonade-router listens on"; host = mkOpt lib.types.str "127.0.0.1" "Address lemonade-router binds to"; logLevel = mkOpt (lib.types.enum [ "critical" "error" "warning" "info" "debug" "trace" ]) "info" "Log level for lemonade-router"; maxLoadedModels = mkOpt lib.types.int 1 "Maximum number of models to keep loaded simultaneously"; extraModelsDir = mkOpt (lib.types.nullOr lib.types.str) null "Extra directory scanned for local GGUF model files"; extraArgs = mkOpt (lib.types.listOf lib.types.str) [ ] "Extra arguments passed verbatim to lemonade-server serve"; modelsDir = mkOpt lib.types.str "/var/lib/${name}/models" "Directory where downloaded models are stored (exposed as HF_HOME)"; apiKeyFile = mkOpt (lib.types.nullOr lib.types.str) null "Path to a file containing the LEMONADE_API_KEY (e.g. a sops secret path)"; }; moduleConfig = { # Install the package system-wide so lemonade-server / lemonade-router are # available in PATH for interactive use alongside the daemon. environment.systemPackages = [ pkgs.${namespace}.lemonade ]; systemd.services.${name} = { description = "Lemonade local LLM server"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" "network-online.target" ]; wants = [ "network-online.target" ]; # lemonade-server discover lemonade-router by reading /proc/self/exe, # so we must use ExecStart with the real binary, not a shell wrapper. serviceConfig = { Type = "simple"; ExecStart = "${getExe pkgs.${namespace}.lemonade} ${serveArgs}"; User = name; Group = name; DynamicUser = mkForce false; # Models and HuggingFace cache land under modelsDir. # HF_HOME overrides the default ~/.cache/huggingface location. Environment = [ "HF_HOME=${cfg.modelsDir}" "XDG_RUNTIME_DIR=/run/${name}" ]; # Load an API key from a secrets file if provided. EnvironmentFile = lib.optional (cfg.apiKeyFile != null) cfg.apiKeyFile; # Runtime directory for PID file / lock file (created automatically # by systemd and owned by the service user). RuntimeDirectory = name; RuntimeDirectoryMode = "0755"; # Persistent state: models cache. StateDirectory = name; StateDirectoryMode = "0750"; # Home directory for the service user (needed by some HF tooling). WorkingDirectory = "/var/lib/${name}"; Restart = "on-failure"; RestartSec = "5s"; StandardOutput = "journal"; StandardError = "journal"; SyslogIdentifier = name; # Hardening — lemonade needs network access and subprocess execution # for spawning llama.cpp / whisper.cpp backends. NoNewPrivileges = true; PrivateTmp = true; ProtectSystem = "strict"; ProtectHome = true; ReadWritePaths = [ "/var/lib/${name}" "/run/${name}" ]; }; }; users.users.${name} = { isSystemUser = true; group = name; home = "/var/lib/${name}"; createHome = true; description = "Lemonade LLM server daemon"; }; users.groups.${name} = { }; }; }; in { imports = [ lemonadeConfig ]; }