Skip to content

Commit

Permalink
Remove leftover autoremove containers during refresh
Browse files Browse the repository at this point in the history
During system shutdown, Podman should go down gracefully, meaning
that we have time to spawn cleanup processes which remove any
containers set to autoremove. Unfortunately, this isn't always
the case. If we get a SIGKILL because the system is going down
immediately, we can't recover from this, and the autoremove
containers are not removed.

However, we can pick up any leftover autoremove containers when
we refesh the DB state, which is the first thing Podman does
after a reboot. By detecting any autoremove containers that have
actually run (a container that was created but never run doesn't
need to be removed) at that point and removing them, we keep the
fresh boot clean, even if Podman was terminated abnormally.

Fixes #21482

[NO NEW TESTS NEEDED] This requires a reboot to realistically
test.

Signed-off-by: Matt Heon <[email protected]>
  • Loading branch information
mheon committed Feb 6, 2024
1 parent 22b1650 commit 9983e87
Showing 1 changed file with 30 additions and 11 deletions.
41 changes: 30 additions & 11 deletions libpod/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func NewRuntime(ctx context.Context, options ...RuntimeOption) (*Runtime, error)
if err != nil {
return nil, err
}
return newRuntimeFromConfig(conf, options...)
return newRuntimeFromConfig(ctx, conf, options...)
}

// NewRuntimeFromConfig creates a new container runtime using the given
Expand All @@ -175,10 +175,10 @@ func NewRuntime(ctx context.Context, options ...RuntimeOption) (*Runtime, error)
// An error will be returned if the configuration file at the given path does
// not exist or cannot be loaded
func NewRuntimeFromConfig(ctx context.Context, userConfig *config.Config, options ...RuntimeOption) (*Runtime, error) {
return newRuntimeFromConfig(userConfig, options...)
return newRuntimeFromConfig(ctx, userConfig, options...)
}

func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runtime, error) {
func newRuntimeFromConfig(ctx context.Context, conf *config.Config, options ...RuntimeOption) (*Runtime, error) {
runtime := new(Runtime)

if conf.Engine.OCIRuntime == "" {
Expand Down Expand Up @@ -223,7 +223,7 @@ func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runti
return nil, fmt.Errorf("starting shutdown signal handler: %w", err)
}

if err := makeRuntime(runtime); err != nil {
if err := makeRuntime(ctx, runtime); err != nil {
return nil, err
}

Expand Down Expand Up @@ -333,7 +333,7 @@ func getDBState(runtime *Runtime) (State, error) {

// Make a new runtime based on the given configuration
// Sets up containers/storage, state store, OCI runtime
func makeRuntime(runtime *Runtime) (retErr error) {
func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) {
// Find a working conmon binary
cPath, err := runtime.config.FindConmon()
if err != nil {
Expand Down Expand Up @@ -629,6 +629,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
return err
}

// Mark the runtime as valid - ready to be used, cannot be modified
// further.
// Need to do this *before* refresh as we can remove containers there.
// Should not be a big deal as we don't return it to users until after
// refresh runs.
runtime.valid = true

// If we need to refresh the state, do it now - things are guaranteed to
// be set up by now.
if doRefresh {
Expand All @@ -639,17 +646,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
}
}

if err2 := runtime.refresh(runtimeAliveFile); err2 != nil {
if err2 := runtime.refresh(ctx, runtimeAliveFile); err2 != nil {
return err2
}
}

runtime.startWorker()

// Mark the runtime as valid - ready to be used, cannot be modified
// further
runtime.valid = true

return nil
}

Expand Down Expand Up @@ -819,7 +822,7 @@ func (r *Runtime) Shutdown(force bool) error {
// Reconfigures the runtime after a reboot
// Refreshes the state, recreating temporary files
// Does not check validity as the runtime is not valid until after this has run
func (r *Runtime) refresh(alivePath string) error {
func (r *Runtime) refresh(ctx context.Context, alivePath string) error {
logrus.Debugf("Podman detected system restart - performing state refresh")

// Clear state of database if not running in container
Expand Down Expand Up @@ -856,6 +859,22 @@ func (r *Runtime) refresh(alivePath string) error {
if err := ctr.refresh(); err != nil {
logrus.Errorf("Refreshing container %s: %v", ctr.ID(), err)
}
// This is the only place it's safe to use ctr.state.State unlocked
// We're holding the alive lock, guaranteed to be the only Libpod on the system right now.
if ctr.AutoRemove() && ctr.state.State == define.ContainerStateExited {
opts := ctrRmOpts{
// Don't force-remove, we're supposed to be fresh off a reboot
// If we have to force something is seriously wrong
Force: false,
RemoveVolume: true,
}
// This container should have autoremoved before the
// reboot but did not.
// Get rid of it.
if _, _, err := r.removeContainer(ctx, ctr, opts); err != nil {
logrus.Errorf("Unable to remove container %s which should have autoremoved: %v", ctr.ID(), err)
}
}
}
for _, pod := range pods {
if err := pod.refresh(); err != nil {
Expand Down

0 comments on commit 9983e87

Please sign in to comment.