From 17183dddac000b4f0866636a6a39e18a2ea87188 Mon Sep 17 00:00:00 2001 From: Guillaume Demonet Date: Mon, 20 Apr 2020 15:02:22 +0200 Subject: [PATCH] scripts: Wait for a single Salt Master container Sometimes, if kubelet restarted the `salt-master` static Pod after an operation, two containers matching the usual selector will co-exist for a small time window. If we use the `scripts/common.sh:get_salt_container` function at that point in time, we may return a string with two container IDs instead of just one, and subsequent commands will fail. Instead, we now wait for a single container to exist (and also add a sleep time between two attemps, which we didn't before). Fixes: #2434 --- scripts/common.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/common.sh b/scripts/common.sh index 666f70d563..f7d13865b2 100644 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -313,13 +313,21 @@ pre_minion_checks() { get_salt_container() { local -r max_retries=10 local salt_container='' attempts=0 + local -a found_containers=() - while [ -z "$salt_container" ] && [ $attempts -lt $max_retries ]; do - salt_container="$(crictl ps -q \ + while [[ $attempts -lt $max_retries ]]; do + IFS=$'\n' read -r -d '' -a found_containers < <(crictl ps -q \ --label io.kubernetes.pod.namespace=kube-system \ --label io.kubernetes.container.name=salt-master \ - --state Running)" + --state Running && printf '\0') + + if [[ "${#found_containers[@]}" -eq 1 ]]; then + salt_container=${found_containers[0]} + break + fi + echo "Invalid number of candidates: ${#found_containers[@]}" >&2 (( attempts++ )) + sleep 3 done if [ -z "$salt_container" ]; then