From d3f52dbf218d1daf8701c9e0fcf1761ad1c84cf2 Mon Sep 17 00:00:00 2001 From: George Wu Date: Wed, 14 Jun 2023 10:51:48 -0400 Subject: [PATCH] Fix issues when startup timeout is hit --- .../apache/druid/k8s/overlord/KubernetesPeonLifecycle.java | 5 +++++ .../druid/k8s/overlord/KubernetesPeonLifecycleTest.java | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/extensions-contrib/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesPeonLifecycle.java b/extensions-contrib/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesPeonLifecycle.java index f90f12048551..be8710e0dadc 100644 --- a/extensions-contrib/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesPeonLifecycle.java +++ b/extensions-contrib/kubernetes-overlord-extensions/src/main/java/org/apache/druid/k8s/overlord/KubernetesPeonLifecycle.java @@ -124,6 +124,11 @@ protected synchronized TaskStatus run(Job job, long launchTimeout, long timeout) return join(timeout); } + catch (Exception e) { + log.info("Failed to run task: %s", taskId.getOriginalTaskId()); + shutdown(); + throw e; + } finally { state.set(State.STOPPED); } diff --git a/extensions-contrib/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesPeonLifecycleTest.java b/extensions-contrib/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesPeonLifecycleTest.java index 253e49c5205e..7035e705985e 100644 --- a/extensions-contrib/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesPeonLifecycleTest.java +++ b/extensions-contrib/kubernetes-overlord-extensions/src/test/java/org/apache/druid/k8s/overlord/KubernetesPeonLifecycleTest.java @@ -163,7 +163,9 @@ protected synchronized TaskStatus join(long timeout) EasyMock.anyLong(), EasyMock.eq(TimeUnit.MILLISECONDS) )).andReturn(null); - + EasyMock.expect(kubernetesClient.deletePeonJob( + new K8sTaskId(ID) + )).andReturn(true); Assert.assertEquals(KubernetesPeonLifecycle.State.NOT_STARTED, peonLifecycle.getState()); replayAll();