Skip to content

Commit

Permalink
Use nix to generate declarative systemd units
Browse files Browse the repository at this point in the history
  • Loading branch information
m1cr0man committed Jul 6, 2023
1 parent f7c2f45 commit b69c75c
Show file tree
Hide file tree
Showing 4 changed files with 355 additions and 9 deletions.
30 changes: 24 additions & 6 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions nixos_nspawn/nix/containers-next/container-options.nix
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,9 @@ in
NixOS configuration for the container. See {manpage}`configuration.nix(5)` for available options.
'';
default = { };
type = lib.mkOptionType {
name = "NixOS configuration";
merge = lib.const (map (x: rec { imports = [ x.value ]; key = _file; _file = x.file; }));
# TODO figure out why the custom type breaks recursive evaluation
# for the imperative host nspawn unit
type = types.attrs;
};

timeoutStartSec = mkOption {
Expand Down
294 changes: 294 additions & 0 deletions nixos_nspawn/nix/containers-next/hypervisor.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
{ pkgs, lib, config, ... }:

with lib;

let
cfg = config.nixos.containers.instances;

yesNo = x: if x then "yes" else "no";
ifacePrefix = type: if type == "veth" then "ve" else "vz";

dynamicAddrsDisabled = inst:
inst.network == null || inst.network.v4.addrPool == [] && inst.network.v6.addrPool == [];

mkRadvdSection = type: name: v6Pool:
assert elem type [ "veth" "zone" ];
''
interface ${ifacePrefix type}-${name} {
AdvSendAdvert on;
${flip concatMapStrings v6Pool (x: ''
prefix ${x} {
AdvOnLink on;
AdvAutonomous on;
};
'')}
};
'';

zoneCfg = config.nixos.containers.zones;

interfaces.containers = attrNames cfg;
interfaces.zones = attrNames config.nixos.containers.zones;
radvd = {
enable = with interfaces; containers != [] || zones != [];
config = concatStringsSep "\n" [
(concatMapStrings
(x: mkRadvdSection "veth" x cfg.${x}.network.v6.addrPool)
(filter
(n: cfg.${n}.network != null && cfg.${n}.zone == null)
(attrNames cfg)))
(concatMapStrings
(x: mkRadvdSection "zone" x config.nixos.containers.zones.${x}.v6.addrPool)
(attrNames config.nixos.containers.zones))
];
};

mkMatchCfg = type: name:
assert elem type [ "veth" "zone" ]; {
Name = "${ifacePrefix type}-${name}";
Driver = if type == "veth" then "veth" else "bridge";
};

mkNetworkCfg = dhcp: { v4Nat, v6Nat }: {
LinkLocalAddressing = mkDefault "ipv6";
DHCPServer = yesNo dhcp;
IPMasquerade =
if v4Nat && v6Nat then "both"
else if v4Nat then "ipv4"
else if v6Nat then "ipv6"
else "no";
IPForward = "yes";
LLDP = "yes";
EmitLLDP = "customer-bridge";
IPv6AcceptRA = "no";
};

recUpdate3 = a: b: c:
recursiveUpdate a (recursiveUpdate b c);

shared = import ./shared.nix { inherit lib; };

inherit (shared) mkNetworkingOpts;

mkImage = name: config: { container = config.system-config; inherit config; };

mkContainer = cfg: let inherit (cfg) container config; in mkMerge [
{
execConfig = mkMerge [
{
Boot = false;
Parameters = "${container.config.system.build.toplevel}/init";
Ephemeral = yesNo config.ephemeral;
KillSignal = "SIGRTMIN+3";
# X-ActivationStrategy = config.activation.strategy;
PrivateUsers = mkDefault "pick";
}
(mkIf (!config.ephemeral) {
LinkJournal = mkDefault "guest";
})
];
filesConfig = mkMerge [
{ PrivateUsersChown = mkDefault "yes"; }
(mkIf config.sharedNix {
BindReadOnly = [ "/nix/store" ] ++ optional config.mountDaemonSocket "/nix/var/nix/db";
})
(mkIf (config.sharedNix && config.mountDaemonSocket) {
Bind = [ "/nix/var/nix/daemon-socket" ];
})
];
networkConfig = mkMerge [
(mkIf (config.zone != null || config.network != null) {
Private = true;
VirtualEthernet = "yes";
})
(mkIf (config.zone != null) {
Zone = config.zone;
})
(mkIf (config.forwardPorts != [ ]) {
Port = config.forwardPorts;
})
];
}
(mkIf (!config.sharedNix) {
extraDrvConfig = let
info = pkgs.closureInfo {
rootPaths = [ container.config.system.build.toplevel ];
};
in pkgs.runCommand "bindmounts.nspawn" { }
''
echo "[Files]" > $out
cat ${info}/store-paths | while read line
do
echo "BindReadOnly=$line" >> $out
done
'';
})
];

images = mapAttrs mkImage cfg;
in {
options.nixos.containers = {
zones = mkOption {
type = types.attrsOf (types.submodule {
options = mkNetworkingOpts "zone";
});
default = {};
description = lib.mdDoc ''
Networking zones for nspawn containers. In this mode, the host-side
of the virtual ethernet of a machine is managed by an interface named
`vz-<name>`.
'';
};

instances = mkOption {
default = {};
type = types.attrsOf (types.submodule ({ ... }: {
options = import ./container-options.nix { inherit pkgs lib; declarative = true; };
}));

description = lib.mdDoc ''
Attribute set to define {manpage}`systemd.nspawn(5)`-managed containers. With this attribute-set,
a network, a shared store and a NixOS configuration can be declared for each running
container.
The container's state is managed in `/var/lib/machines/<name>`.
A machine can be started with the
`systemd-nspawn@<name>.service`-unit, during runtime it can
be accessed with {manpage}`machinectl(1)`.
Please note that if both [](#opt-nixos.containers.instances._name_.network)
& [](#opt-nixos.containers.instances._name_.zone) are
`null`, the container will use the host's network.
'';
};
};

config = mkIf (cfg != {}) {
assertions = [
{ assertion = !config.boot.isContainer;
message = ''
Cannot start containers inside a container!
'';
}
{ assertion = config.networking.useNetworkd;
message = "Only networkd is supported!";
}
] ++ foldlAttrs (acc: n: inst: acc ++ [
{ assertion = inst.zone != null -> (config.nixos.containers.zones != null && config.nixos.containers.zones?${inst.zone});
message = ''
No configuration found for zone `${inst.zone}'!
(Invalid container: ${n})
'';
}
{ assertion = inst.zone != null -> dynamicAddrsDisabled inst;
message = ''
Cannot assign additional generic address-pool to a veth-pair if corresponding
container `${n}' already uses zone `${inst.zone}'!
'';
}
{ assertion = !inst.sharedNix -> ! (elem inst.activation.strategy [ "reload" "dynamic" ]);
message = ''
Cannot reload a container with `sharedNix' disabled! As soon as the
`BindReadOnly='-options change, a config activation can't be done without a reboot
(affected: ${n})!
'';
}
{ assertion = (inst.zone != null && inst.network != null) -> (inst.network.v4.static.hostAddresses ++ inst.network.v6.static.hostAddresses) == [];
message = ''
Container ${n} is in zone ${inst.zone}, but also attempts to define
it's one host-side addresses. Use the host-side addresses of the zone instead.
'';
}
]) [ ] cfg;

services = { inherit radvd; };

systemd = {
network.networks =
foldlAttrs (acc: name: config: acc // optionalAttrs (config.network != null && config.zone == null) {
"20-${ifacePrefix "veth"}-${name}" = {
matchConfig = mkMatchCfg "veth" name;
address = config.network.v4.addrPool
++ config.network.v6.addrPool
++ optionals (config.network.v4.static.hostAddresses != null)
config.network.v4.static.hostAddresses
++ optionals (config.network.v6.static.hostAddresses != null)
config.network.v6.static.hostAddresses;
networkConfig = mkNetworkCfg (config.network.v4.addrPool != []) {
v4Nat = config.network.v4.nat;
v6Nat = config.network.v6.nat;
};
};
}) { } cfg
// foldlAttrs (acc: name: zone: acc // {
"20-${ifacePrefix "zone"}-${name}" = {
matchConfig = mkMatchCfg "zone" name;
address = zone.v4.addrPool
++ zone.v6.addrPool
++ zone.hostAddresses;
networkConfig = mkNetworkCfg true {
v4Nat = zone.v4.nat;
v6Nat = zone.v6.nat;
};
};
}) { } config.nixos.containers.zones;

nspawn = mapAttrs (const mkContainer) images;
targets.machines.wants = map (x: "systemd-nspawn@${x}.service") (attrNames cfg);
services = flip mapAttrs' cfg (container: { activation, timeoutStartSec, credentials, ... }:
nameValuePair "systemd-nspawn@${container}" {
preStart = mkBefore ''
if [ ! -d /var/lib/machines/${container} ]; then
mkdir -p /var/lib/machines/${container}/{etc,var,nix/var/nix}
touch /var/lib/machines/${container}/etc/{os-release,machine-id}
fi
'';

partOf = [ "machines.target" ];
before = [ "machines.target" ];

serviceConfig = mkMerge [
{ TimeoutStartSec = timeoutStartSec;
# Inherit settings from `[email protected]`.
# Workaround since settings from `[email protected]`-settings are not
# picked up if an override exists and `systemd-nspawn@ldap` exists.
RestartForceExitStatus = 133;
Type = "notify";
TasksMax = 16384;
WatchdogSec = "3min";
SuccessExitStatus = 133;
Delegate = "yes";
KillMode = "mixed";
Slice = "machine.slice";
DevicePolicy = "closed";
DeviceAllow = [
"/dev/net/tun rwm"
"char-pts rw"
"/dev/loop-control rw"
"block-loop rw"
"block-blkext rw"
"/dev/mapper/control rw"
"block-device-mapper rw"
];
X-ActivationStrategy = activation.strategy;
ExecStart = [
""
"${config.systemd.package}/bin/systemd-nspawn ${credentials} --quiet --keep-unit --boot --network-veth --settings=override --machine=%i"
];
}
(mkIf (elem activation.strategy [ "reload" "dynamic" ]) {
ExecReload = if activation.reloadScript != null
then "${activation.reloadScript}"
else "${pkgs.writeShellScript "activate" ''
pid=$(machinectl show ${container} --value --property Leader)
${pkgs.util-linux}/bin/nsenter -t "$pid" -m -u -U -i -n -p \
-- ${images.${container}.container.config.system.build.toplevel}/bin/switch-to-configuration test
''}";
})
];
}
);
};
};
}
Loading

0 comments on commit b69c75c

Please sign in to comment.