diff --git a/build/images/scripts/install_cni_chaining b/build/images/scripts/install_cni_chaining index d7e7281ad43..8ef6a8aa5cb 100755 --- a/build/images/scripts/install_cni_chaining +++ b/build/images/scripts/install_cni_chaining @@ -47,9 +47,9 @@ while (( "$#" )); do shift done -# Find the cni conf file with lowest name +# Find the cni conf file with lowest name, which is not installed by us while true; do - cni_conf_name=$(ls "$HOST_CNI_NET_DIR" | head -n1) + cni_conf_name=$(ls "$HOST_CNI_NET_DIR" | grep -v antrea | head -n1) if [[ ! -z $cni_conf_name ]]; then break fi @@ -58,16 +58,22 @@ while true; do done cni_conf_path="$HOST_CNI_NET_DIR/$cni_conf_name" +# EKS uses 10-aws.conflist +# AKS uses 10-azure.conflist +cni_conf_name_antrea="05-antrea.conflist" +cni_conf_path_antrea="$HOST_CNI_NET_DIR/$cni_conf_name_antrea" + +cni_conf_sha="" + function update_cni_conf { - log_info "install_cni_chaining" "updating CNI conf file $cni_conf_name" + log_info "install_cni_chaining" "updating CNI conf file $cni_conf_name -> $cni_conf_name_antrea" - # To limit the risk of issue because of race conditions, we use the - # following steps: - # 1. read the file once and store its contents in a variable + # We use the following steps: + # 1. read the input file once and store its contents in a variable # 2. perform the necessary changes on the variable contents - # 3. compute the sha of the updated variable contents - # 4. write the variable contents back to the file + # 3. write the variable contents to the output file if necessary content=$(cat $cni_conf_path) + cni_conf_sha="$(echo -n "$content" | sha256sum | while read -r s _; do echo "$s"; done)" echo "$content" | grep -sq "azure" if [[ $? == 0 ]]; then @@ -81,9 +87,13 @@ function update_cni_conf { content="$(echo "$content" | jq '.plugins += [{"type": "antrea"}]')" fi - cni_conf_sha="$(echo "$content" | sha256sum | while read -r s _; do echo "$s"; done)" - - echo "$content" > $cni_conf_path + # we only write to the file if an update is necessary + cmp -s <(echo "$content") $cni_conf_path_antrea + if [[ $? != 0 ]]; then + echo "$content" > $cni_conf_path_antrea + else + log_info "install_cni_chaining" "CNI conf file is already up-to-date" + fi } # monitor will start a watch on host's CNI config directory. diff --git a/build/yamls/antrea-aks-node-init.yml b/build/yamls/antrea-aks-node-init.yml index fcdd3a2485e..d53975c6153 100644 --- a/build/yamls/antrea-aks-node-init.yml +++ b/build/yamls/antrea-aks-node-init.yml @@ -21,7 +21,7 @@ spec: hostNetwork: true containers: - name: node-init - image: gcr.io/google-containers/startup-script:v1 + image: gcr.io/google-containers/startup-script:v2 imagePullPolicy: IfNotPresent securityContext: privileged: true diff --git a/build/yamls/antrea-eks-node-init.yml b/build/yamls/antrea-eks-node-init.yml index e894f4e395f..5b84c864b64 100644 --- a/build/yamls/antrea-eks-node-init.yml +++ b/build/yamls/antrea-eks-node-init.yml @@ -42,28 +42,12 @@ spec: set -o pipefail set -o nounset - # The STARTUP_SCRIPT environment variable (which is set to the contents of this - # script) will be available when the script is executed :) - checkpoint_path="/opt/cni/antrea-node-init-status-$(md5sum <<<"${STARTUP_SCRIPT}" | cut -c-32)" - - if [ -f "$checkpoint_path" ]; then - echo "Antrea node init already done. Exiting" - exit - fi - - echo "Initializing node for Antrea" + echo "Initializing Node for Antrea" + cni_conf="/etc/cni/net.d/05-antrea.conflist" while true; do - cni_conf=$(ls /etc/cni/net.d | head -n1) - if [[ ! -z $cni_conf ]]; then break; fi - echo "Waiting for cni conf file" - sleep 2s - done - cni_conf="/etc/cni/net.d/$cni_conf" - - while true; do - if grep -sq "antrea" $cni_conf; then break; fi - echo "Waiting for antrea config to be updated" + if [[ -f $cni_conf ]]; then break; fi + echo "Waiting for Antrea CNI conf file" sleep 2s done @@ -72,19 +56,20 @@ spec: test -e /var/run/docker.sock || container_runtime="containerd" echo "Container runtime: $container_runtime" - # Wait for kubelet to register the file update. Default sync time is 5sec + # Wait for kubelet to register the file update. + # Default sync time is 5s so we sleep for 10s. # https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/dockershim/network/cni/cni.go#L50 - sleep 5s + sleep 10s while true; do - curl localhost:61679 && retry=false || retry=true + curl -sS -o /dev/null localhost:61679 && retry=false || retry=true if [ $retry == false ]; then break ; fi sleep 2s echo "Waiting for aws-k8s-agent" done # Fetch running containers from aws-k8s-agent and kill them - echo "\n" + echo for container_id in $(cat /var/run/aws-node/ipam.json | jq -r '.allocations | .[] | .containerID'); do echo "Restarting container with ID: ${container_id}" if [[ "$container_runtime" == "docker" ]]; then @@ -94,12 +79,8 @@ spec: fi done - # Save the Node init status, to avoid container restart in case of node-init Pod - # restart or worker Node reboot. - # Note that gcr.io/google-containers/startup-script:v2 also includes a similar - # mechanism but it doesn't prevent the script from being run again when the Node - # restarts, since the checkpoint path is located in the /tmp folder. + # The script will run again if the contents of the script change. + # It may also run again in case of worker Node reboot, assuming + # that the contents of /tmp are deleted. # See https://github.com/kubernetes-retired/contrib/blob/master/startup-script/manage-startup-script.sh. - touch "$checkpoint_path" - echo "Node initialization completed" diff --git a/build/yamls/antrea-gke-node-init.yml b/build/yamls/antrea-gke-node-init.yml index 1919906639d..2c68fda5caa 100644 --- a/build/yamls/antrea-gke-node-init.yml +++ b/build/yamls/antrea-gke-node-init.yml @@ -21,7 +21,7 @@ spec: hostNetwork: true containers: - name: node-init - image: gcr.io/google-containers/startup-script:v1 + image: gcr.io/google-containers/startup-script:v2 imagePullPolicy: IfNotPresent securityContext: privileged: true