Skip to content

Commit

Permalink
feat: support lvm auto activation
Browse files Browse the repository at this point in the history
Support lvm auto-activation as per
https://man7.org/linux/man-pages/man7/lvmautoactivation.7.html.

This changes from how Talos previously used to unconditionally tried to
activate all volume groups to based on udev events.

Fixes: siderolabs#9300

Signed-off-by: Noel Georgi <[email protected]>
  • Loading branch information
frezbo committed Sep 18, 2024
1 parent 7bd26df commit d4042ec
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 7 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-09-12T16:43:46Z by kres 8be5fa7.
# Generated on 2024-09-18T10:36:36Z by kres 8be5fa7.

name: default
concurrency:
Expand Down Expand Up @@ -2771,7 +2771,7 @@ jobs:
- name: e2e-qemu
env:
IMAGE_REGISTRY: registry.dev.siderolabs.io
QEMU_EXTRA_DISKS: "2"
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/integration-qemu-cron.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-09-09T13:58:35Z by kres 8be5fa7.
# Generated on 2024-09-18T10:36:36Z by kres 8be5fa7.

name: integration-qemu-cron
concurrency:
Expand Down Expand Up @@ -81,7 +81,7 @@ jobs:
- name: e2e-qemu
env:
IMAGE_REGISTRY: registry.dev.siderolabs.io
QEMU_EXTRA_DISKS: "2"
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_DRIVERS: ide,nvme
QEMU_EXTRA_DISKS_SIZE: "10240"
WITH_CONFIG_PATCH_WORKER: '@hack/test/patches/ephemeral-nvme.yaml'
Expand Down
2 changes: 1 addition & 1 deletion .kres.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ spec:
withSudo: true
environment:
IMAGE_REGISTRY: registry.dev.siderolabs.io
QEMU_EXTRA_DISKS: "2"
QEMU_EXTRA_DISKS: "3"
QEMU_EXTRA_DISKS_SIZE: "10240"
QEMU_EXTRA_DISKS_DRIVERS: "ide,nvme"
WITH_CONFIG_PATCH_WORKER: "@hack/test/patches/ephemeral-nvme.yaml"
Expand Down
132 changes: 132 additions & 0 deletions internal/app/machined/pkg/controllers/block/lvm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package block

import (
"context"
"fmt"
"strings"

"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/gen/optional"
"github.com/siderolabs/go-cmd/pkg/cmd"
"go.uber.org/zap"

"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/block"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
"github.com/siderolabs/talos/pkg/machinery/resources/v1alpha1"
)

// LVMActivationController activates LVM volumes when they are discovered by the block.DiscoveryController.
type LVMActivationController struct{}

// Name implements controller.Controller interface.
func (ctrl *LVMActivationController) Name() string {
return "block.LVMActivationController"
}

// Inputs implements controller.Controller interface.
func (ctrl *LVMActivationController) Inputs() []controller.Input {
return []controller.Input{
{
Namespace: v1alpha1.NamespaceName,
Type: runtimeres.MountStatusType,
ID: optional.Some(constants.EphemeralPartitionLabel),
Kind: controller.InputWeak,
},
{
Namespace: block.NamespaceName,
Type: block.DiscoveredVolumeType,
Kind: controller.InputWeak,
},
}
}

// Outputs implements controller.Controller interface.
func (ctrl *LVMActivationController) Outputs() []controller.Output {
return nil
}

// Run implements controller.Controller interface.
//
//nolint:gocyclo
func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
touchedIDs := make(map[string]struct{})

for {
select {
case <-ctx.Done():
return nil
case <-r.EventCh():
if _, err := safe.ReaderGetByID[*runtimeres.MountStatus](ctx, r, constants.EphemeralPartitionLabel); err != nil {
if state.IsNotFoundError(err) {
// wait for the mount status to be available
continue
}

return fmt.Errorf("failed to get mount status: %w", err)
}
}

discoveredVolumes, err := safe.ReaderListAll[*block.DiscoveredVolume](ctx, r)
if err != nil && !state.IsNotFoundError(err) {
return fmt.Errorf("failed to list discovered volumes: %w", err)
}

for iterator := discoveredVolumes.Iterator(); iterator.Next(); {
if iterator.Value().TypedSpec().Name != "lvm2-pv" {
continue
}

// first we check if all associated volumes are available
// https://man7.org/linux/man-pages/man7/lvmautoactivation.7.html
stdOut, err := cmd.RunContext(ctx,
"/sbin/lvm",
"pvscan",
"--cache",
"--verbose",
"--listvg",
"--checkcomplete",
"--vgonline",
"--autoactivation",
"event",
"--udevoutput",
iterator.Value().TypedSpec().DevPath,
)
if err != nil {
return fmt.Errorf("failed to check LVM volume availability: %w", err)
}

if stdOut == "" || !strings.HasPrefix(stdOut, "LVM_VG_NAME_COMPLETE") {
continue
}

vgName := strings.TrimSuffix(strings.TrimPrefix(strings.TrimSuffix(stdOut, "\n"), "LVM_VG_NAME_COMPLETE='"), "'")

if _, ok := touchedIDs[vgName]; ok {
continue
}

logger.Info("activating LVM volume", zap.String("name", vgName))

// activate the volume group
if _, err = cmd.RunContext(ctx,
"/sbin/lvm",
"vgchange",
"-aay",
"--autoactivation",
"event",
vgName,
); err != nil {
return fmt.Errorf("failed to activate LVM volume %s: %w", vgName, err)
}

touchedIDs[vgName] = struct{}{}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
},
&block.DiscoveryController{},
&block.DisksController{},
&block.LVMActivationController{},
&block.SystemDiskController{},
&block.UserDiskConfigController{},
&block.VolumeConfigController{},
Expand Down
2 changes: 1 addition & 1 deletion internal/integration/api/extensions_qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import (
"github.com/siderolabs/talos/pkg/machinery/resources/network"
)

// ExtensionsSuiteQEMU verifies Talos is securebooted.
// ExtensionsSuiteQEMU verifies Talos extensions on QEMU.
type ExtensionsSuiteQEMU struct {
base.K8sSuite

Expand Down
109 changes: 108 additions & 1 deletion internal/integration/api/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,25 @@ package api

import (
"context"
"fmt"
"strings"
"testing"
"time"

"github.com/cosi-project/runtime/pkg/safe"
"github.com/siderolabs/go-pointer"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/siderolabs/talos/internal/integration/base"
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/config/machine"
"github.com/siderolabs/talos/pkg/machinery/resources/block"
)

// VolumesSuite ...
type VolumesSuite struct {
base.APISuite
base.K8sSuite

ctx context.Context //nolint:containedctx
ctxCancel context.CancelFunc
Expand Down Expand Up @@ -175,6 +182,106 @@ func (suite *VolumesSuite) TestDisks() {
}
}

// TestLVMActivation verifies that LVM volume group is activated after reboot.
func (suite *VolumesSuite) TestLVMActivation() {
if testing.Short() {
suite.T().Skip("skipping LVM activation test in short mode")
}

if suite.ExtensionsQEMU {
// we use suite.UserDisks in in extensions test also, just want to avoid some edge cases
suite.T().Skip("skipping LVM activation test when extensions test are enabled")
}

node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)

if !suite.lvmVolumeExists() {
userDisks, err := suite.UserDisks(suite.ctx, node, 10)
suite.Require().NoError(err)

suite.Require().GreaterOrEqual(len(userDisks), 2, "expected at least two user disks with size greater than 10GB to be available")

_, err = suite.Clientset.CoreV1().Pods("kube-system").Create(suite.ctx, &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pv-create",
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "pv-create",
Image: "alpine",
Command: []string{
"tail",
"-f",
"/dev/null",
},
SecurityContext: &corev1.SecurityContext{
Privileged: pointer.To(true),
},
},
},
HostNetwork: true,
HostPID: true,
},
}, metav1.CreateOptions{})
defer suite.Clientset.CoreV1().Pods("kube-system").Delete(suite.ctx, "pv-create", metav1.DeleteOptions{}) //nolint:errcheck

suite.Require().NoError(err)

// wait for the pod to be ready
suite.Require().NoError(suite.WaitForPodToBeRunning(suite.ctx, 5*time.Minute, "kube-system", "pv-create"))

stdout, _, err := suite.ExecuteCommandInPod(
suite.ctx,
"kube-system",
"pv-create",
fmt.Sprintf("nsenter --mount=/proc/1/ns/mnt -- vgcreate lvmtest %s", strings.Join(userDisks[:2], " ")),
)
suite.Require().NoError(err)

suite.Require().Contains(stdout, "Volume group \"lvmtest\" successfully created")

stdout, _, err = suite.ExecuteCommandInPod(
suite.ctx,
"kube-system",
"pv-create",
"nsenter --mount=/proc/1/ns/mnt -- lvcreate -n test -L 5G lvmtest",
)
suite.Require().NoError(err)

suite.Require().Contains(stdout, "Logical volume \"test\" created.")
}

// now we want to reboot the node and make sure the array is still mounted
suite.AssertRebooted(
suite.ctx, node, func(nodeCtx context.Context) error {
return base.IgnoreGRPCUnavailable(suite.Client.Reboot(nodeCtx))
}, 5*time.Minute,
)

suite.Require().True(suite.lvmVolumeExists(), "LVM volume group was not activated after reboot")
}

func (suite *VolumesSuite) lvmVolumeExists() bool {
node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)

ctx := client.WithNode(suite.ctx, node)

volumes, err := safe.StateListAll[*block.DiscoveredVolume](ctx, suite.Client.COSI)
suite.Require().NoError(err)

var lvmVolumeCount int

for iterator := volumes.Iterator(); iterator.Next(); {
if iterator.Value().TypedSpec().Name == "lvm2-pv" {
lvmVolumeCount++
}
}

// we test with creating a volume group with two block devices
return lvmVolumeCount == 2
}

func init() {
allSuites = append(allSuites, new(VolumesSuite))
}

0 comments on commit d4042ec

Please sign in to comment.