Skip to content

Commit

Permalink
Preference to first worker error in-case job fails with `TooManyAttem…
Browse files Browse the repository at this point in the history
…ptsForWorker` (#14170)
  • Loading branch information
cryptoe authored May 1, 2023
1 parent 90ea192 commit 078d5ac
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
import org.apache.druid.msq.indexing.error.MSQFault;
import org.apache.druid.msq.indexing.error.MSQFaultUtils;
import org.apache.druid.msq.indexing.error.QueryRuntimeFault;
import org.apache.druid.msq.indexing.error.TooManyAttemptsForJob;
import org.apache.druid.msq.indexing.error.TooManyAttemptsForWorker;
import org.apache.druid.msq.indexing.error.UnknownFault;
import org.apache.druid.msq.indexing.error.WorkerFailedFault;
import org.apache.druid.msq.indexing.error.WorkerRpcFailedFault;
Expand Down Expand Up @@ -201,7 +203,10 @@ static MSQErrorReport makeErrorReport(
// function, and it's best if helper functions run quietly.)
if (workerErrorReport != null && (controllerErrorReport.getFault() instanceof WorkerFailedFault
|| controllerErrorReport.getFault() instanceof WorkerRpcFailedFault
|| controllerErrorReport.getFault() instanceof CanceledFault)) {
|| controllerErrorReport.getFault() instanceof CanceledFault
|| controllerErrorReport.getFault() instanceof TooManyAttemptsForWorker
|| controllerErrorReport.getFault() instanceof TooManyAttemptsForJob)) {

return workerErrorReport;
} else {
return controllerErrorReport;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import org.apache.druid.msq.indexing.error.MSQException;
import org.apache.druid.msq.indexing.error.MSQFaultUtils;
import org.apache.druid.msq.indexing.error.TaskStartTimeoutFault;
import org.apache.druid.msq.indexing.error.TooManyAttemptsForJob;
import org.apache.druid.msq.indexing.error.TooManyAttemptsForWorker;
import org.apache.druid.msq.indexing.error.TooManyColumnsFault;
import org.apache.druid.msq.indexing.error.TooManyWorkersFault;
import org.apache.druid.msq.indexing.error.UnknownFault;
Expand Down Expand Up @@ -141,6 +143,53 @@ public void test_makeErrorReport_workerPreferred()
);
}

@Test
public void test_makeErrorReport_controllerWithTooManyAttemptsForJob_workerPreferred()
{
final MSQErrorReport controllerReport = MSQTasks.makeErrorReport(
WORKER_ID,
WORKER_HOST,
MSQErrorReport.fromFault(WORKER_ID, WORKER_HOST, null, new TooManyAttemptsForJob(1, 1, "xxx", "xxx")),
null
);

final MSQErrorReport workerReport = MSQTasks.makeErrorReport(
WORKER_ID,
WORKER_HOST,
MSQErrorReport.fromFault(WORKER_ID, WORKER_HOST, null, new TooManyColumnsFault(1, 10)),
null
);

Assert.assertEquals(
workerReport,
MSQTasks.makeErrorReport(WORKER_ID, WORKER_HOST, controllerReport, workerReport)
);
}

@Test
public void test_makeErrorReport_controllerWithTooManyAttemptsForWorker_workerPreferred()
{
final MSQErrorReport controllerReport = MSQTasks.makeErrorReport(
WORKER_ID,
WORKER_HOST,
MSQErrorReport.fromFault(WORKER_ID, WORKER_HOST, null, new TooManyAttemptsForWorker(1, "xxx", 1, "xxx")),
null
);

final MSQErrorReport workerReport = MSQTasks.makeErrorReport(
WORKER_ID,
WORKER_HOST,
MSQErrorReport.fromFault(WORKER_ID, WORKER_HOST, null, new TooManyColumnsFault(1, 10)),
null
);

Assert.assertEquals(
workerReport,
MSQTasks.makeErrorReport(WORKER_ID, WORKER_HOST, controllerReport, workerReport)
);
}


@Test
public void test_getWorkerFromTaskId()
{
Expand Down

0 comments on commit 078d5ac

Please sign in to comment.