Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wait for waitInitExit() to return #1249

Merged
merged 2 commits into from
Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cmd/containerd-shim-runhcs-v1/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,9 @@ func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all
return wt.KillExec(ctx, eid, signal, all)
})

// iterate all
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}
eg.Go(func() error {
Expand Down
55 changes: 47 additions & 8 deletions cmd/containerd-shim-runhcs-v1/task_hcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,8 +536,9 @@ func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all
}).Warn("failed to kill exec in task")
}

// iterate all
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}
if signal == 0x9 && eid == "" && ht.host != nil {
Expand Down Expand Up @@ -578,8 +579,9 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim
ex.ForceExit(ctx, 1)
}

// iterate next
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}
switch state := e.State(); state {
Expand All @@ -588,6 +590,41 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim
case shimExecStateRunning:
return 0, 0, time.Time{}, newExecInvalidStateError(ht.id, eid, state, "delete")
}

if eid == "" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are cleaning up resources here when deleting the init task (since this triggers ht.close()), should we also delete everything in ht.execs here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. Let me dig a bit deeper and see if the entire hcsTask object is not reaped somewhere after init dies and the init task gets deleted.

Copy link
Contributor Author

@gabriel-samfira gabriel-samfira Dec 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the PR to remove the task object after successfully deleting the init exec:

https://github.com/microsoft/hcsshim/pull/1249/files#diff-3524c9e80b115f6d1dd47e517ed3861db8c2fb7c7857224b2fc5e11bbf50f722R232

// We are killing the init task, so we expect the container to be
// stopped after this.
//
// The task process may have already exited, and the status set to
// shimExecStateExited, but resources may still be in the process
// of being cleaned up. Wait for ht.closed to be closed. This signals
// that waitInitExit() has finished destroying container resources,
// and layers were umounted.
// If the shim exits before resources are cleaned up, those resources
// will remain locked and untracked, which leads to lingering sandboxes
// and container resources like base vhdx.
select {
case <-time.After(30 * time.Second):
log.G(ctx).Error("timed out waiting for resource cleanup")
return 0, 0, time.Time{}, errors.Wrap(hcs.ErrTimeout, "waiting for container resource cleanup")
case <-ht.closed:
}

// The init task has now exited. A ForceExit() has already been sent to
// execs. Cleanup execs and continue.
ht.execs.Range(func(key, value interface{}) bool {
if key == "" {
// Iterate next.
return true
}
ht.execs.Delete(key)

// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}

status := e.Status()
if eid != "" {
ht.execs.Delete(eid)
Expand Down Expand Up @@ -617,8 +654,9 @@ func (ht *hcsTask) Pids(ctx context.Context) ([]runhcsopts.ProcessDetails, error
ex := value.(shimExec)
pidMap[ex.Pid()] = ex.ID()

// Iterate all
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
pidMap[ht.init.Pid()] = ht.init.ID()

Expand Down Expand Up @@ -699,8 +737,9 @@ func (ht *hcsTask) waitForHostExit() {
ex := value.(shimExec)
ex.ForceExit(ctx, 1)

// iterate all
return false
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
ht.init.ForceExit(ctx, 1)
ht.closeHost(ctx)
Expand Down
12 changes: 12 additions & 0 deletions cmd/containerd-shim-runhcs-v1/task_hcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ func Test_hcsTask_DeleteExec_InitExecID_CreatedState_Success(t *testing.T) {
// remove the 2nd exec so we just check without it.
lt.execs.Delete(second.id)

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -178,6 +180,8 @@ func Test_hcsTask_DeleteExec_InitExecID_RunningState_Error(t *testing.T) {
// Start the init exec
_ = init.Start(context.TODO())

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -192,6 +196,8 @@ func Test_hcsTask_DeleteExec_InitExecID_ExitedState_Success(t *testing.T) {

_ = init.Kill(context.TODO(), 0xf)

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -207,6 +213,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_CreatedState_Error(t *testing.T)
// start the init exec (required to have 2nd exec)
_ = init.Start(context.TODO())

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -226,6 +234,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_RunningState_Error(t *testing.T)
// put the 2nd exec into the running state
_ = second.Start(context.TODO())

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand All @@ -244,6 +254,8 @@ func Test_hcsTask_DeleteExec_InitExecID_2ndExec_ExitedState_Success(t *testing.T
// put the 2nd exec into the exited state
_ = second.Kill(context.TODO(), 0xf)

// Simulate waitInitExit() closing the host
close(lt.closed)
// try to delete the init exec
pid, status, at, err := lt.DeleteExec(context.TODO(), "")

Expand Down