Skip to content

Commit

Permalink
[SPARK-2147 / 2161] Show removed executors on the UI
Browse files Browse the repository at this point in the history
This PR includes two changes
- **[SPARK-2147]** When an application finishes cleanly (i.e. `sc.stop()` is called), all of its executors used to disappear from the Master UI. This no longer happens.
- **[SPARK-2161]** This adds a "Removed Executors" table to Master UI, so the user can find out why their executors died from the logs, for instance. The equivalent table already existed in the Worker UI, but was hidden because of a bug (the comment `//scalastyle:off` disconnected the `Seq[Node]` that represents the HTML for table).

This should go into 1.0.1 if possible.

Author: Andrew Or <[email protected]>

Closes #1102 from andrewor14/remember-removed-executors and squashes the following commits:

2e2298f [Andrew Or] Add hash code method to ExecutorInfo (minor)
abd72e0 [Andrew Or] Merge branch 'master' of github.com:apache/spark into remember-removed-executors
792f992 [Andrew Or] Add missing equals method in ExecutorInfo
3390b49 [Andrew Or] Add executor state column to WorkerPage
161f8a2 [Andrew Or] Display finished executors table (fix bug)
fbb65b8 [Andrew Or] Removed unused method
c89bb6e [Andrew Or] Add table for removed executors in MasterWebUI
fe47402 [Andrew Or] Show exited executors on the Master UI

(cherry picked from commit a14807e)
Signed-off-by: Aaron Davidson <[email protected]>
  • Loading branch information
andrewor14 authored and aarondav committed Jun 17, 2014
1 parent 7f5df6a commit e6c9058
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.deploy.master
import java.util.Date

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

import akka.actor.ActorRef

Expand All @@ -36,6 +37,7 @@ private[spark] class ApplicationInfo(

@transient var state: ApplicationState.Value = _
@transient var executors: mutable.HashMap[Int, ExecutorInfo] = _
@transient var removedExecutors: ArrayBuffer[ExecutorInfo] = _
@transient var coresGranted: Int = _
@transient var endTime: Long = _
@transient var appSource: ApplicationSource = _
Expand All @@ -51,6 +53,7 @@ private[spark] class ApplicationInfo(
endTime = -1L
appSource = new ApplicationSource(this)
nextExecutorId = 0
removedExecutors = new ArrayBuffer[ExecutorInfo]
}

private def newExecutorId(useID: Option[Int] = None): Int = {
Expand All @@ -74,6 +77,7 @@ private[spark] class ApplicationInfo(

def removeExecutor(exec: ExecutorInfo) {
if (executors.contains(exec.id)) {
removedExecutors += executors(exec.id)
executors -= exec.id
coresGranted -= exec.cores
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,19 @@ private[spark] class ExecutorInfo(
}

def fullId: String = application.id + "/" + id

override def equals(other: Any): Boolean = {
other match {
case info: ExecutorInfo =>
fullId == info.fullId &&
worker.id == info.worker.id &&
cores == info.cores &&
memory == info.memory
case _ => false
}
}

override def toString: String = fullId

override def hashCode: Int = toString.hashCode()
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import scala.xml.Node
import akka.pattern.ask
import org.json4s.JValue

import org.apache.spark.deploy.JsonProtocol
import org.apache.spark.deploy.{ExecutorState, JsonProtocol}
import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
import org.apache.spark.deploy.master.ExecutorInfo
import org.apache.spark.ui.{WebUIPage, UIUtils}
Expand Down Expand Up @@ -57,43 +57,55 @@ private[spark] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app
})

val executorHeaders = Seq("ExecutorID", "Worker", "Cores", "Memory", "State", "Logs")
val executors = app.executors.values.toSeq
val executorTable = UIUtils.listingTable(executorHeaders, executorRow, executors)
val allExecutors = (app.executors.values ++ app.removedExecutors).toSet.toSeq
// This includes executors that are either still running or have exited cleanly
val executors = allExecutors.filter { exec =>
!ExecutorState.isFinished(exec.state) || exec.state == ExecutorState.EXITED
}
val removedExecutors = allExecutors.diff(executors)
val executorsTable = UIUtils.listingTable(executorHeaders, executorRow, executors)
val removedExecutorsTable = UIUtils.listingTable(executorHeaders, executorRow, removedExecutors)

val content =
<div class="row-fluid">
<div class="span12">
<ul class="unstyled">
<li><strong>ID:</strong> {app.id}</li>
<li><strong>Name:</strong> {app.desc.name}</li>
<li><strong>User:</strong> {app.desc.user}</li>
<li><strong>Cores:</strong>
{
if (app.desc.maxCores.isEmpty) {
"Unlimited (%s granted)".format(app.coresGranted)
} else {
"%s (%s granted, %s left)".format(
app.desc.maxCores.get, app.coresGranted, app.coresLeft)
}
}
</li>
<li>
<strong>Executor Memory:</strong>
{Utils.megabytesToString(app.desc.memoryPerSlave)}
</li>
<li><strong>Submit Date:</strong> {app.submitDate}</li>
<li><strong>State:</strong> {app.state}</li>
<li><strong><a href={app.desc.appUiUrl}>Application Detail UI</a></strong></li>
</ul>
</div>
<div class="row-fluid">
<div class="span12">
<ul class="unstyled">
<li><strong>ID:</strong> {app.id}</li>
<li><strong>Name:</strong> {app.desc.name}</li>
<li><strong>User:</strong> {app.desc.user}</li>
<li><strong>Cores:</strong>
{
if (app.desc.maxCores.isEmpty) {
"Unlimited (%s granted)".format(app.coresGranted)
} else {
"%s (%s granted, %s left)".format(
app.desc.maxCores.get, app.coresGranted, app.coresLeft)
}
}
</li>
<li>
<strong>Executor Memory:</strong>
{Utils.megabytesToString(app.desc.memoryPerSlave)}
</li>
<li><strong>Submit Date:</strong> {app.submitDate}</li>
<li><strong>State:</strong> {app.state}</li>
<li><strong><a href={app.desc.appUiUrl}>Application Detail UI</a></strong></li>
</ul>
</div>
</div>

<div class="row-fluid"> <!-- Executors -->
<div class="span12">
<h4> Executor Summary </h4>
{executorTable}
</div>
</div>;
<div class="row-fluid"> <!-- Executors -->
<div class="span12">
<h4> Executor Summary </h4>
{executorsTable}
{
if (removedExecutors.nonEmpty) {
<h4> Removed Executors </h4> ++
removedExecutorsTable
}
}
</div>
</div>;
UIUtils.basicSparkPage(content, "Application: " + app.desc.name)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,74 +46,62 @@ private[spark] class WorkerPage(parent: WorkerWebUI) extends WebUIPage("") {
val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse]
val workerState = Await.result(stateFuture, timeout)

val executorHeaders = Seq("ExecutorID", "Cores", "Memory", "Job Details", "Logs")
val executorHeaders = Seq("ExecutorID", "Cores", "State", "Memory", "Job Details", "Logs")
val runningExecutors = workerState.executors
val runningExecutorTable =
UIUtils.listingTable(executorHeaders, executorRow, workerState.executors)
UIUtils.listingTable(executorHeaders, executorRow, runningExecutors)
val finishedExecutors = workerState.finishedExecutors
val finishedExecutorTable =
UIUtils.listingTable(executorHeaders, executorRow, workerState.finishedExecutors)
UIUtils.listingTable(executorHeaders, executorRow, finishedExecutors)

val driverHeaders = Seq("DriverID", "Main Class", "State", "Cores", "Memory", "Logs", "Notes")
val runningDrivers = workerState.drivers.sortBy(_.driverId).reverse
val runningDriverTable = UIUtils.listingTable(driverHeaders, driverRow, runningDrivers)
val finishedDrivers = workerState.finishedDrivers.sortBy(_.driverId).reverse
def finishedDriverTable = UIUtils.listingTable(driverHeaders, driverRow, finishedDrivers)
val finishedDriverTable = UIUtils.listingTable(driverHeaders, driverRow, finishedDrivers)

// For now we only show driver information if the user has submitted drivers to the cluster.
// This is until we integrate the notion of drivers and applications in the UI.
def hasDrivers = runningDrivers.length > 0 || finishedDrivers.length > 0

val content =
<div class="row-fluid"> <!-- Worker Details -->
<div class="span12">
<ul class="unstyled">
<li><strong>ID:</strong> {workerState.workerId}</li>
<li><strong>
Master URL:</strong> {workerState.masterUrl}
</li>
<li><strong>Cores:</strong> {workerState.cores} ({workerState.coresUsed} Used)</li>
<li><strong>Memory:</strong> {Utils.megabytesToString(workerState.memory)}
({Utils.megabytesToString(workerState.memoryUsed)} Used)</li>
</ul>
<p><a href={workerState.masterWebUiUrl}>Back to Master</a></p>
</div>
<div class="row-fluid"> <!-- Worker Details -->
<div class="span12">
<ul class="unstyled">
<li><strong>ID:</strong> {workerState.workerId}</li>
<li><strong>
Master URL:</strong> {workerState.masterUrl}
</li>
<li><strong>Cores:</strong> {workerState.cores} ({workerState.coresUsed} Used)</li>
<li><strong>Memory:</strong> {Utils.megabytesToString(workerState.memory)}
({Utils.megabytesToString(workerState.memoryUsed)} Used)</li>
</ul>
<p><a href={workerState.masterWebUiUrl}>Back to Master</a></p>
</div>

<div class="row-fluid"> <!-- Running Executors -->
<div class="span12">
<h4> Running Executors {workerState.executors.size} </h4>
{runningExecutorTable}
</div>
</div>
// scalastyle:off
<div>
{if (hasDrivers)
<div class="row-fluid"> <!-- Running Drivers -->
<div class="span12">
<h4> Running Drivers {workerState.drivers.size} </h4>
{runningDriverTable}
</div>
</div>
</div>
<div class="row-fluid"> <!-- Executors and Drivers -->
<div class="span12">
<h4> Running Executors ({runningExecutors.size}) </h4>
{runningExecutorTable}
{
if (runningDrivers.nonEmpty) {
<h4> Running Drivers ({runningDrivers.size}) </h4> ++
runningDriverTable
}
}
</div>

<div class="row-fluid"> <!-- Finished Executors -->
<div class="span12">
<h4> Finished Executors </h4>
{finishedExecutorTable}
</div>
</div>

<div>
{if (hasDrivers)
<div class="row-fluid"> <!-- Finished Drivers -->
<div class="span12">
<h4> Finished Drivers </h4>
{finishedDriverTable}
</div>
</div>
{
if (finishedExecutors.nonEmpty) {
<h4>Finished Executors ({finishedExecutors.size}) </h4> ++
finishedExecutorTable
}
}
</div>;
// scalastyle:on
{
if (finishedDrivers.nonEmpty) {
<h4> Finished Drivers ({finishedDrivers.size}) </h4> ++
finishedDriverTable
}
}
</div>
</div>;
UIUtils.basicSparkPage(content, "Spark Worker at %s:%s".format(
workerState.host, workerState.port))
}
Expand All @@ -122,6 +110,7 @@ private[spark] class WorkerPage(parent: WorkerWebUI) extends WebUIPage("") {
<tr>
<td>{executor.execId}</td>
<td>{executor.cores}</td>
<td>{executor.state}</td>
<td sorttable_customkey={executor.memory.toString}>
{Utils.megabytesToString(executor.memory)}
</td>
Expand Down

0 comments on commit e6c9058

Please sign in to comment.