From 76881a295eb81d774cc764ce33291b239d0ea731 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Thu, 23 Dec 2021 13:19:57 +0200 Subject: [PATCH 1/2] Fix Range() iteration The function passed into the Range function of sync.Map will stop the iteration if false is returned. This commit makes sure we iterate through all elements in the map. Signed-off-by: Gabriel Adrian Samfira --- cmd/containerd-shim-runhcs-v1/pod.go | 5 +++-- cmd/containerd-shim-runhcs-v1/task_hcs.go | 20 ++++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cmd/containerd-shim-runhcs-v1/pod.go b/cmd/containerd-shim-runhcs-v1/pod.go index 6ed3c35b8b..a4713f2892 100644 --- a/cmd/containerd-shim-runhcs-v1/pod.go +++ b/cmd/containerd-shim-runhcs-v1/pod.go @@ -380,8 +380,9 @@ func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all return wt.KillExec(ctx, eid, signal, all) }) - // iterate all - return false + // Iterate all. Returning false stops the iteration. See: + // https://pkg.go.dev/sync#Map.Range + return true }) } eg.Go(func() error { diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 0f48c0f6a6..eb7157509e 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -536,8 +536,9 @@ func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all }).Warn("failed to kill exec in task") } - // iterate all - return false + // Iterate all. Returning false stops the iteration. See: + // https://pkg.go.dev/sync#Map.Range + return true }) } if signal == 0x9 && eid == "" && ht.host != nil { @@ -578,8 +579,9 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim ex.ForceExit(ctx, 1) } - // iterate next - return false + // Iterate all. Returning false stops the iteration. See: + // https://pkg.go.dev/sync#Map.Range + return true }) } switch state := e.State(); state { @@ -617,8 +619,9 @@ func (ht *hcsTask) Pids(ctx context.Context) ([]runhcsopts.ProcessDetails, error ex := value.(shimExec) pidMap[ex.Pid()] = ex.ID() - // Iterate all - return false + // Iterate all. Returning false stops the iteration. See: + // https://pkg.go.dev/sync#Map.Range + return true }) pidMap[ht.init.Pid()] = ht.init.ID() @@ -699,8 +702,9 @@ func (ht *hcsTask) waitForHostExit() { ex := value.(shimExec) ex.ForceExit(ctx, 1) - // iterate all - return false + // Iterate all. Returning false stops the iteration. See: + // https://pkg.go.dev/sync#Map.Range + return true }) ht.init.ForceExit(ctx, 1) ht.closeHost(ctx) From a6edb2596b408a6476e8a0b5b1b1b830423b4a55 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Mon, 20 Dec 2021 17:57:58 +0200 Subject: [PATCH 2/2] Wait for waitInitExit() to return This change gives waitInitExit() a chance to cleanup resource when DeleteExec() is called, before returning. This should fix situations where the shim exits before releasing container resources. Signed-off-by: Gabriel Adrian Samfira --- cmd/containerd-shim-runhcs-v1/task_hcs.go | 35 +++++++++++++++++++ .../task_hcs_test.go | 12 +++++++ 2 files changed, 47 insertions(+) diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index eb7157509e..1184e7cba1 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -590,6 +590,41 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim case shimExecStateRunning: return 0, 0, time.Time{}, newExecInvalidStateError(ht.id, eid, state, "delete") } + + if eid == "" { + // We are killing the init task, so we expect the container to be + // stopped after this. + // + // The task process may have already exited, and the status set to + // shimExecStateExited, but resources may still be in the process + // of being cleaned up. Wait for ht.closed to be closed. This signals + // that waitInitExit() has finished destroying container resources, + // and layers were umounted. + // If the shim exits before resources are cleaned up, those resources + // will remain locked and untracked, which leads to lingering sandboxes + // and container resources like base vhdx. + select { + case <-time.After(30 * time.Second): + log.G(ctx).Error("timed out waiting for resource cleanup") + return 0, 0, time.Time{}, errors.Wrap(hcs.ErrTimeout, "waiting for container resource cleanup") + case <-ht.closed: + } + + // The init task has now exited. A ForceExit() has already been sent to + // execs. Cleanup execs and continue. + ht.execs.Range(func(key, value interface{}) bool { + if key == "" { + // Iterate next. + return true + } + ht.execs.Delete(key) + + // Iterate all. Returning false stops the iteration. See: + // https://pkg.go.dev/sync#Map.Range + return true + }) + } + status := e.Status() if eid != "" { ht.execs.Delete(eid) diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs_test.go b/cmd/containerd-shim-runhcs-v1/task_hcs_test.go index 798af38556..cbeeaabda7 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs_test.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs_test.go @@ -161,6 +161,8 @@ func Test_hcsTask_DeleteExec_InitExecID_CreatedState_Success(t *testing.T) { // remove the 2nd exec so we just check without it. lt.execs.Delete(second.id) + // Simulate waitInitExit() closing the host + close(lt.closed) // try to delete the init exec pid, status, at, err := lt.DeleteExec(context.TODO(), "") @@ -178,6 +180,8 @@ func Test_hcsTask_DeleteExec_InitExecID_RunningState_Error(t *testing.T) { // Start the init exec _ = init.Start(context.TODO()) + // Simulate waitInitExit() closing the host + close(lt.closed) // try to delete the init exec pid, status, at, err := lt.DeleteExec(context.TODO(), "") @@ -192,6 +196,8 @@ func Test_hcsTask_DeleteExec_InitExecID_ExitedState_Success(t *testing.T) { _ = init.Kill(context.TODO(), 0xf) + // Simulate waitInitExit() closing the host + close(lt.closed) // try to delete the init exec pid, status, at, err := lt.DeleteExec(context.TODO(), "") @@ -207,6 +213,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_CreatedState_Error(t *testing.T) // start the init exec (required to have 2nd exec) _ = init.Start(context.TODO()) + // Simulate waitInitExit() closing the host + close(lt.closed) // try to delete the init exec pid, status, at, err := lt.DeleteExec(context.TODO(), "") @@ -226,6 +234,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_RunningState_Error(t *testing.T) // put the 2nd exec into the running state _ = second.Start(context.TODO()) + // Simulate waitInitExit() closing the host + close(lt.closed) // try to delete the init exec pid, status, at, err := lt.DeleteExec(context.TODO(), "") @@ -244,6 +254,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_ExitedState_Success(t *testing.T // put the 2nd exec into the exited state _ = second.Kill(context.TODO(), 0xf) + // Simulate waitInitExit() closing the host + close(lt.closed) // try to delete the init exec pid, status, at, err := lt.DeleteExec(context.TODO(), "")