From 9f80765a448ba9612c40b3b6ecdd09b6e1c22d16 Mon Sep 17 00:00:00 2001 From: mackenzie-oa <98547235+mackenzie-oa@users.noreply.github.com> Date: Mon, 25 Jul 2022 11:04:47 -0300 Subject: [PATCH] Bugfix: sweep-pod.sh never deleting certain Failed pods (#14925) (#14931) --- charts/airbyte-pod-sweeper/files/sweep-pod.sh | 5 +++-- kube/resources/pod-sweeper.yaml | 7 +++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/charts/airbyte-pod-sweeper/files/sweep-pod.sh b/charts/airbyte-pod-sweeper/files/sweep-pod.sh index 1d5e56c42c74..a6b343c7bb9b 100644 --- a/charts/airbyte-pod-sweeper/files/sweep-pod.sh +++ b/charts/airbyte-pod-sweeper/files/sweep-pod.sh @@ -3,7 +3,7 @@ get_worker_pods () { kubectl -n ${KUBE_NAMESPACE} -L airbyte -l airbyte=worker-pod \ --field-selector status.phase!=Running get pods \ - -o=jsonpath='{range .items[*]} {.metadata.name} {.status.phase} {.status.conditions[0].lastTransitionTime}{"\n"}{end}' + -o=jsonpath='{range .items[*]} {.metadata.name} {.status.phase} {.status.conditions[0].lastTransitionTime} {.status.startTime}{"\n"}{end}' } delete_worker_pod() { @@ -26,7 +26,8 @@ do POD_NAME=`echo $POD | cut -d " " -f 1` POD_STATUS=`echo $POD | cut -d " " -f 2` POD_DATE_STR=`echo $POD | cut -d " " -f 3` - POD_DATE=`date -d $POD_DATE_STR '+%s'` + POD_START_DATE_STR=`echo $POD | cut -d " " -f 4` + POD_DATE=`date -d ${POD_DATE_STR:-$POD_START_DATE_STR} '+%s'` if [ "$POD_STATUS" = "Succeeded" ]; then if [ "$POD_DATE" -lt "$SUCCESS_DATE" ]; then delete_worker_pod "$POD_NAME" "$POD_STATUS" "$POD_DATE_STR" diff --git a/kube/resources/pod-sweeper.yaml b/kube/resources/pod-sweeper.yaml index 77dd0a2d34d5..b2a35b3bc461 100644 --- a/kube/resources/pod-sweeper.yaml +++ b/kube/resources/pod-sweeper.yaml @@ -9,7 +9,7 @@ data: get_worker_pods () { kubectl -n ${KUBE_NAMESPACE} -L airbyte -l airbyte=worker-pod \ --field-selector status.phase!=Running get pods \ - -o=jsonpath='{range .items[*]} {.metadata.name} {.status.phase} {.status.conditions[0].lastTransitionTime}{"\n"}{end}' + -o=jsonpath='{range .items[*]} {.metadata.name} {.status.phase} {.status.conditions[0].lastTransitionTime} {.status.startTime}{"\n"}{end}' } delete_worker_pod() { @@ -25,8 +25,6 @@ data: # Longer time window for pods in error (to debug) NON_SUCCESS_DATE_STR=`date -d 'now - 24 hours' --utc -Ins` NON_SUCCESS_DATE=`date -d $NON_SUCCESS_DATE_STR +%s` - # default time to use in case its unavailable from kubectl - DEFAULT=`date --utc -Ins` ( IFS=$'\n' for POD in `get_worker_pods`; do @@ -34,7 +32,8 @@ data: POD_NAME=`echo $POD | cut -d " " -f 1` POD_STATUS=`echo $POD | cut -d " " -f 2` POD_DATE_STR=`echo $POD | cut -d " " -f 3` - POD_DATE=`date -d ${POD_DATE_STR:-$DEFAULT} '+%s'` + POD_START_DATE_STR=`echo $POD | cut -d " " -f 4` + POD_DATE=`date -d ${POD_DATE_STR:-$POD_START_DATE_STR} '+%s'` if [ "$POD_STATUS" = "Succeeded" ]; then if [ "$POD_DATE" -lt "$SUCCESS_DATE" ]; then delete_worker_pod "$POD_NAME" "$POD_STATUS" "$POD_DATE_STR"