Skip to content

Commit

Permalink
Merge pull request #8221 from hakman/docker-healthcheck
Browse files Browse the repository at this point in the history
Move "docker-healthcheck" to DockerBuilder
  • Loading branch information
k8s-ci-robot authored Mar 15, 2020
2 parents 67def16 + 6a28d4f commit ba1d87e
Show file tree
Hide file tree
Showing 22 changed files with 832 additions and 83 deletions.
11 changes: 11 additions & 0 deletions docs/releases/1.18-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

* Terraform users on AWS may need to rename some resources in their state file in order to prepare for Terraform 0.12 support. See Required Actions below.

* The Docker `health-check` service is now disabled by default. It shouldn't be needed anymore, but it can still be enabled by setting `spec.docker.healthCheck: true`. It is recommended to also check [node-problem-detector](https://github.com/kubernetes/node-problem-detector) and [draino](https://github.com/planetlabs/draino) as replacements. See Required Actions below.

* Lyft CNI plugin default subnet tags changed from from `Type: pod` to `KubernetesCluster: myclustername.mydns.io`. Subnets intended for use by the plugin will need to be tagged with this new tag and [additional tag filters](https://github.com/lyft/cni-ipvlan-vpc-k8s#other-configuration-flags) may need to be added to the cluster spec in order to achieve the desired set of subnets.

* Support for Kubernetes versions prior to 1.9 has been removed.
Expand Down Expand Up @@ -46,6 +48,15 @@
terraform apply
```

* Users that need the Docker `health-check` service will need to explicitly enable it:
```
kops edit cluster
# Add the following section
spec:
docker:
healthCheck: true
```

* Kubernetes 1.9 users will need to enable the PodPriority feature gate. This is required for newer versions of Kops.

To enable the Pod priority feature, follow these steps:
Expand Down
3 changes: 3 additions & 0 deletions k8s/crds/kops.k8s.io_clusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,9 @@ spec:
description: Experimental features permits enabling new features
such as dockerd metrics
type: boolean
healthCheck:
description: HealthCheck enables the periodic health-check service
type: boolean
hosts:
description: Hosts enables you to configure the endpoints the docker
daemon listens on i.e. tcp://0.0.0.0.2375 or unix:///var/run/docker.sock
Expand Down
8 changes: 8 additions & 0 deletions nodeup/pkg/model/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,14 @@ func (c *NodeupModelContext) IsKubernetesGTE(version string) bool {
return util.IsKubernetesGTE(version, c.kubernetesVersion)
}

// IsKubernetesLT checks if the version is less-than
func (c *NodeupModelContext) IsKubernetesLT(version string) bool {
if c.kubernetesVersion.Major == 0 {
klog.Fatalf("kubernetesVersion not set (%s); Init not called", c.kubernetesVersion)
}
return !c.IsKubernetesGTE(version)
}

// UseEtcdManager checks if the etcd cluster has etcd-manager enabled
func (c *NodeupModelContext) UseEtcdManager() bool {
for _, x := range c.Cluster.Spec.EtcdClusters {
Expand Down
73 changes: 73 additions & 0 deletions nodeup/pkg/model/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,13 @@ func (b *DockerBuilder) Build(c *fi.ModelBuilderContext) error {
return err
}

// Enable health-check
if b.healthCheck() || (b.IsKubernetesLT("1.18") && b.Distribution.IsDebianFamily()) {
c.AddTask(b.buildSystemdHealthCheckScript())
c.AddTask(b.buildSystemdHealthCheckService())
c.AddTask(b.buildSystemdHealthCheckTimer())
}

return nil
}

Expand Down Expand Up @@ -1147,6 +1154,60 @@ func (b *DockerBuilder) buildSystemdService(dockerVersionMajor int, dockerVersio
return service
}

func (b *DockerBuilder) buildSystemdHealthCheckScript() *nodetasks.File {
script := &nodetasks.File{
Path: "/opt/kops/bin/docker-healthcheck",
Contents: fi.NewStringResource(resources.DockerHealthCheck),
Type: nodetasks.FileType_File,
Mode: s("0755"),
}

return script
}

func (b *DockerBuilder) buildSystemdHealthCheckService() *nodetasks.Service {
manifest := &systemd.Manifest{}

manifest.Set("Unit", "Description", "Run docker-healthcheck once")
manifest.Set("Unit", "Documentation", "https://kops.sigs.k8s.io")
manifest.Set("Service", "Type", "oneshot")
manifest.Set("Service", "ExecStart", "/opt/kops/bin/docker-healthcheck")
manifest.Set("Install", "WantedBy", "multi-user.target")

manifestString := manifest.Render()
klog.V(8).Infof("Built service manifest %q\n%s", "docker-healthcheck.service", manifestString)

service := &nodetasks.Service{
Name: "docker-healthcheck.service",
Definition: s(manifestString),
}

service.InitDefaults()

return service
}

func (b *DockerBuilder) buildSystemdHealthCheckTimer() *nodetasks.Service {
manifest := &systemd.Manifest{}
manifest.Set("Unit", "Description", "Trigger docker-healthcheck periodically")
manifest.Set("Unit", "Documentation", "https://kops.sigs.k8s.io")
manifest.Set("Timer", "OnUnitInactiveSec", "10s")
manifest.Set("Timer", "Unit", "docker-healthcheck.service")
manifest.Set("Install", "WantedBy", "multi-user.target")

manifestString := manifest.Render()
klog.V(8).Infof("Built timer manifest %q\n%s", "docker-healthcheck.timer", manifestString)

service := &nodetasks.Service{
Name: "docker-healthcheck.timer",
Definition: s(manifestString),
}

service.InitDefaults()

return service
}

// buildContainerOSConfigurationDropIn is responsible for configuring the docker daemon options
func (b *DockerBuilder) buildContainerOSConfigurationDropIn(c *fi.ModelBuilderContext) error {
lines := []string{
Expand Down Expand Up @@ -1260,3 +1321,15 @@ func (b *DockerBuilder) skipInstall() bool {

return d.SkipInstall
}

// healthCheck determines if kops should enable the health-check for Docker
func (b *DockerBuilder) healthCheck() bool {
d := b.Cluster.Spec.Docker

// don't enable the health-check if the user hasn't specified anything
if d == nil {
return false
}

return d.HealthCheck
}
4 changes: 4 additions & 0 deletions nodeup/pkg/model/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ func TestDockerBuilder_SkipInstall(t *testing.T) {
runDockerBuilderTest(t, "skipinstall")
}

func TestDockerBuilder_HealthCheck(t *testing.T) {
runDockerBuilderTest(t, "healthcheck")
}

func TestDockerBuilder_BuildFlags(t *testing.T) {
logDriver := "json-file"
grid := []struct {
Expand Down
5 changes: 3 additions & 2 deletions nodeup/pkg/model/resources/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"containerd.go",
"docker.go",
"containerd_license.go",
"docker_healthcheck.go",
"docker_license.go",
],
importpath = "k8s.io/kops/nodeup/pkg/model/resources",
visibility = ["//visibility:public"],
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
#!/bin/bash
/*
Copyright 2019 The Kubernetes Authors.
# Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package resources

var DockerHealthCheck = `#!/bin/bash
# Copyright 2019 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -18,7 +36,7 @@
# of docker. If it detects a failure, it will restart docker using systemctl.
healthcheck() {
if output=`timeout 60 docker network ls`; then
if output=` + "`timeout 60 docker network ls`" + `; then
echo "$output" | fgrep -qw host || {
echo "docker 'host' network missing"
return 1
Expand Down Expand Up @@ -47,7 +65,7 @@ echo "docker still unresponsive; triggering docker restart"
systemctl stop docker
echo "wait all tcp sockets to close"
sleep `cat /proc/sys/net/ipv4/tcp_fin_timeout`
sleep ` + "`cat /proc/sys/net/ipv4/tcp_fin_timeout`" + `
sleep 10
systemctl start docker
Expand All @@ -61,3 +79,4 @@ if healthcheck; then
fi
echo "docker still failing"
`
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ var DockerApache2License = `
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
https://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
Expand Down Expand Up @@ -195,24 +195,13 @@ var DockerApache2License = `
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Copyright 2013-2018 Docker, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
Expand Down
119 changes: 105 additions & 14 deletions nodeup/pkg/model/tests/dockerbuilder/docker_1.12.1/tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,80 @@ contents: |-
path: /etc/sysconfig/docker
type: file
---
contents: |
#!/bin/bash
# Copyright 2019 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to be run periodically, to check the health
# of docker. If it detects a failure, it will restart docker using systemctl.
healthcheck() {
if output=`timeout 60 docker network ls`; then
echo "$output" | fgrep -qw host || {
echo "docker 'host' network missing"
return 1
}
else
echo "docker returned $?"
return 1
fi
}
if healthcheck; then
echo "docker healthy"
exit 0
fi
echo "docker failed"
echo "Giving docker 30 seconds grace before restarting"
sleep 30
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still unresponsive; triggering docker restart"
systemctl stop docker
echo "wait all tcp sockets to close"
sleep `cat /proc/sys/net/ipv4/tcp_fin_timeout`
sleep 10
systemctl start docker
echo "Waiting 120 seconds to give docker time to start"
sleep 60
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still failing"
mode: "0755"
path: /opt/kops/bin/docker-healthcheck
type: file
---
contents: |2
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
https://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
Expand Down Expand Up @@ -184,24 +252,13 @@ contents: |2
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Copyright 2013-2018 Docker, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
Expand All @@ -217,6 +274,40 @@ preventStart: true
source: http://apt.dockerproject.org/repo/pool/main/d/docker-engine/docker-engine_1.12.1-0~xenial_amd64.deb
version: 1.12.1-0~xenial
---
Name: docker-healthcheck.service
definition: |
[Unit]
Description=Run docker-healthcheck once
Documentation=https://kops.sigs.k8s.io
[Service]
Type=oneshot
ExecStart=/opt/kops/bin/docker-healthcheck
[Install]
WantedBy=multi-user.target
enabled: true
manageState: true
running: true
smartRestart: true
---
Name: docker-healthcheck.timer
definition: |
[Unit]
Description=Trigger docker-healthcheck periodically
Documentation=https://kops.sigs.k8s.io
[Timer]
OnUnitInactiveSec=10s
Unit=docker-healthcheck.service
[Install]
WantedBy=multi-user.target
enabled: true
manageState: true
running: true
smartRestart: true
---
Name: docker.service
definition: |
[Unit]
Expand Down
Loading

0 comments on commit ba1d87e

Please sign in to comment.