apache · IgorBerman · Apr 12, 2017 · Apr 12, 2017 · Feb 20, 2018 · Mar 21, 2018
diff --git a/...n/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/...n/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -62,9 +62,6 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   private lazy val hadoopDelegationTokenManager: MesosHadoopDelegationTokenManager =
     new MesosHadoopDelegationTokenManager(conf, sc.hadoopConfiguration, driverEndpoint)
 
-  // Blacklist a slave after this many failures
-  private val MAX_SLAVE_FAILURES = 2
-
   private val maxCoresOption = conf.getOption("spark.cores.max").map(_.toInt)
 
   private val executorCoresOption = conf.getOption("spark.executor.cores").map(_.toInt)
@@ -571,7 +568,11 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
       cpus + totalCoresAcquired <= maxCores &&
       mem <= offerMem &&
       numExecutors < executorLimit &&
-      slaves.get(slaveId).map(_.taskFailures).getOrElse(0) < MAX_SLAVE_FAILURES &&
+      // nodeBlacklist() currently only gets updated based on failures in spark tasks.
+      // If a mesos task fails to even start -- that is,
+      // if a spark executor fails to launch on a node -- nodeBlacklist does not get updated
+      // see SPARK-24567 for details
+      !scheduler.nodeBlacklist().contains(offerHostname) &&
       meetsPortRequirements &&
       satisfiesLocality(offerHostname)
   }
@@ -648,14 +649,8 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
           totalGpusAcquired -= gpus
           gpusByTaskId -= taskId
         }
-        // If it was a failure, mark the slave as failed for blacklisting purposes
         if (TaskState.isFailed(state)) {
-          slave.taskFailures += 1
-
-          if (slave.taskFailures >= MAX_SLAVE_FAILURES) {
-            logInfo(s"Blacklisting Mesos slave $slaveId due to too many failures; " +
-                "is Spark installed on it?")
-          }
+          logError(s"Mesos task $taskId failed on Mesos slave $slaveId.")
         }
         executorTerminated(d, slaveId, taskId, s"Executor finished with state $state")
         // In case we'd rejected everything before but have now lost a node
@@ -798,7 +793,6 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
 
 private class Slave(val hostname: String) {
   val taskIDs = new mutable.HashSet[String]()
-  var taskFailures = 0
   var shuffleRegistered = false
 }
 

diff --git a/...la/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/...la/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -108,6 +108,28 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     verifyTaskLaunched(driver, "o2")
   }
 
+  test("mesos declines offers from blacklisted slave") {
+    setBackend()
+
+    // launches a task on a valid offer on slave s1
+    val minMem = backend.executorMemory(sc) + 1024
+    val minCpu = 4
+    val offer1 = Resources(minMem, minCpu)
+    offerResources(List(offer1))
+    verifyTaskLaunched(driver, "o1")
+
+    // for any reason executor(aka mesos task) failed on s1
+    val status = createTaskStatus("0", "s1", TaskState.TASK_FAILED)
+    backend.statusUpdate(driver, status)
+    when(taskScheduler.nodeBlacklist()).thenReturn(Set("hosts1"))
+
+    val offer2 = Resources(minMem, minCpu)
+    // Offer resources from the same slave
+    offerResources(List(offer2))
+    // but since it's blacklisted the offer is declined
+    verifyDeclinedOffer(driver, createOfferId("o1"))
+  }
+
   test("mesos supports spark.executor.cores") {
     val executorCores = 4
     setBackend(Map("spark.executor.cores" -> executorCores.toString))
@@ -790,6 +812,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
     taskScheduler = mock[TaskSchedulerImpl]
     when(taskScheduler.sc).thenReturn(sc)
+    when(taskScheduler.nodeBlacklist()).thenReturn(Set[String]())
 
     externalShuffleClient = mock[MesosExternalShuffleClient]