Skip to content

Commit

Permalink
Adding Json serialization and responding to Reynold's feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
pwendell committed Jul 7, 2014
1 parent ad85076 commit 7a63abc
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 22 deletions.
4 changes: 2 additions & 2 deletions core/src/main/scala/org/apache/spark/Accumulators.scala
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,8 @@ GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Serializa
* @param param helper object defining how to add elements of type `T`
* @tparam T result type
*/
class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T], _name: String, _display: Boolean)
extends Accumulable[T,T](initialValue, param) {
class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T], _name: String,
_display: Boolean) extends Accumulable[T,T](initialValue, param) {
override def name = if (_name.eq(null)) s"accumulator_$id" else _name
override def display = _display
def this(initialValue: T, param: AccumulatorParam[T]) = this(initialValue, param, null, true)
Expand Down
3 changes: 2 additions & 1 deletion core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ class HadoopRDD[K, V](
minPartitions)
}

val hadoopInputBytes = sc.accumulator(0L, s"rdd-$id.input.bytes.hadoop")(SparkContext.LongAccumulatorParam)
private val accName = s"rdd-$id.input.bytes.hadoop"
val hadoopInputBytes = sc.accumulator(0L, accName)(SparkContext.LongAccumulatorParam)

protected val jobConfCacheKey = "rdd_%d_job_conf".format(id)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -824,8 +824,8 @@ class DAGScheduler(
if (partialValue != acc.zero) {
val stringPartialValue = s"${partialValue}"
val stringValue = s"${acc.value}"
stageToInfos(stage).accumulatorValues(name) = stringValue
event.taskInfo.accumValues += ((name, stringPartialValue))
stageToInfos(stage).accumulatedValues(name) = stringValue
event.taskInfo.accumulableValues += ((name, stringPartialValue))
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class StageInfo(
/** If the stage failed, the reason why. */
var failureReason: Option[String] = None
/** Terminal values of accumulables updated during this stage. */
val accumulatorValues: Map[String, String] = HashMap[String, String]()
val accumulatedValues: Map[String, String] = HashMap[String, String]()

def stageFailed(reason: String) {
failureReason = Some(reason)
Expand Down
5 changes: 3 additions & 2 deletions core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ class TaskInfo(
var gettingResultTime: Long = 0

/**
* Terminal values of accumulables updated during this task.
* Intermediate updates to accumulables during this task. Note that it is valid for the same
* accumulable to be updated multiple times in a single task.
*/
val accumValues = ListBuffer[(String, String)]()
val accumulableValues = ListBuffer[(String, String)]()

/**
* The time when the task has completed successfully (including the time to remotely fetch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class JobProgressListener(conf: SparkConf) extends SparkListener {
poolToActiveStages(stageIdToPool(stageId)).remove(stageId)

val accumulables = stageIdToAccumulables.getOrElseUpdate(stageId, HashMap[String, String]())
stageCompleted.stageInfo.accumulatorValues.foreach { case (name, value) =>
stageCompleted.stageInfo.accumulatedValues.foreach { case (name, value) =>
accumulables(name) = value
}

Expand Down Expand Up @@ -156,7 +156,7 @@ class JobProgressListener(conf: SparkConf) extends SparkListener {

if (info != null) {
val accumulables = stageIdToAccumulables.getOrElseUpdate(sid, HashMap[String, String]())
info.accumValues.map { case (name, value) =>
info.accumulableValues.map { case (name, value) =>
accumulables(name) = value
}

Expand Down
5 changes: 3 additions & 2 deletions core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
// scalastyle:on
val accumulableHeaders: Seq[String] = Seq("Accumulable", "Value")
def accumulableRow(acc: (String, String)) = <tr><td>{acc._1}</td><td>{acc._2}</td></tr>
val accumulableTable = UIUtils.listingTable(accumulableHeaders, accumulableRow, accumulables.toSeq)
val accumulableTable = UIUtils.listingTable(accumulableHeaders, accumulableRow,
accumulables.toSeq)

val taskHeaders: Seq[String] =
Seq(
Expand Down Expand Up @@ -289,7 +290,7 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
<td sorttable_customkey={gcTime.toString}>
{if (gcTime > 0) UIUtils.formatDuration(gcTime) else ""}
</td>
<td>{Unparsed(info.accumValues.map{ case (k, v) => s"$k += $v" }.mkString("<br/>"))}</td>
<td>{Unparsed(info.accumulableValues.map{ case (k, v) => s"$k: $v" }.mkString("<br/>"))}</td>
<!--
TODO: Add this back after we add support to hide certain columns.
<td sorttable_customkey={serializationTime.toString}>
Expand Down
22 changes: 20 additions & 2 deletions core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,14 @@ private[spark] object JsonProtocol {
("Details" -> stageInfo.details) ~
("Submission Time" -> submissionTime) ~
("Completion Time" -> completionTime) ~
("Failure Reason" -> failureReason)
("Failure Reason" -> failureReason) ~
("Accumulated Values" -> mapToJson(stageInfo.accumulatedValues))
}

def taskInfoToJson(taskInfo: TaskInfo): JValue = {
val accumUpdateMap = taskInfo.accumulableValues.map { case (k, v) =>
mapToJson(Map(k -> v))
}.toList
("Task ID" -> taskInfo.taskId) ~
("Index" -> taskInfo.index) ~
("Attempt" -> taskInfo.attempt) ~
Expand All @@ -204,7 +208,8 @@ private[spark] object JsonProtocol {
("Speculative" -> taskInfo.speculative) ~
("Getting Result Time" -> taskInfo.gettingResultTime) ~
("Finish Time" -> taskInfo.finishTime) ~
("Failed" -> taskInfo.failed)
("Failed" -> taskInfo.failed) ~
("Accumulable Updates" -> JArray(accumUpdateMap))
}

def taskMetricsToJson(taskMetrics: TaskMetrics): JValue = {
Expand Down Expand Up @@ -485,11 +490,17 @@ private[spark] object JsonProtocol {
val submissionTime = Utils.jsonOption(json \ "Submission Time").map(_.extract[Long])
val completionTime = Utils.jsonOption(json \ "Completion Time").map(_.extract[Long])
val failureReason = Utils.jsonOption(json \ "Failure Reason").map(_.extract[String])
val accumulatedValues = (json \ "Accumulated Values").extractOpt[JObject].map(mapFromJson(_))

val stageInfo = new StageInfo(stageId, stageName, numTasks, rddInfos, details)
stageInfo.submissionTime = submissionTime
stageInfo.completionTime = completionTime
stageInfo.failureReason = failureReason
accumulatedValues.foreach { values =>
for ((k, v) <- values) {
stageInfo.accumulatedValues(k) = v
}
}
stageInfo
}

Expand All @@ -505,12 +516,19 @@ private[spark] object JsonProtocol {
val gettingResultTime = (json \ "Getting Result Time").extract[Long]
val finishTime = (json \ "Finish Time").extract[Long]
val failed = (json \ "Failed").extract[Boolean]
val accumulableUpdates = (json \ "Accumulable Updates").extractOpt[Seq[JValue]].map(
updates => updates.map(mapFromJson(_)))

val taskInfo =
new TaskInfo(taskId, index, attempt, launchTime, executorId, host, taskLocality, speculative)
taskInfo.gettingResultTime = gettingResultTime
taskInfo.finishTime = finishTime
taskInfo.failed = failed
accumulableUpdates.foreach { maps =>
for (m <- maps) {
taskInfo.accumulableValues += m.head
}
}
taskInfo
}

Expand Down
31 changes: 23 additions & 8 deletions core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ class JsonProtocolSuite extends FunSuite {
(0 until info1.rddInfos.size).foreach { i =>
assertEquals(info1.rddInfos(i), info2.rddInfos(i))
}
assert(info1.accumulatedValues === info2.accumulatedValues)
assert(info1.details === info2.details)
}

Expand Down Expand Up @@ -293,6 +294,7 @@ class JsonProtocolSuite extends FunSuite {
assert(info1.gettingResultTime === info2.gettingResultTime)
assert(info1.finishTime === info2.finishTime)
assert(info1.failed === info2.failed)
assert(info1.accumulableValues === info2.accumulableValues)
}

private def assertEquals(metrics1: TaskMetrics, metrics2: TaskMetrics) {
Expand Down Expand Up @@ -477,11 +479,19 @@ class JsonProtocolSuite extends FunSuite {

private def makeStageInfo(a: Int, b: Int, c: Int, d: Long, e: Long) = {
val rddInfos = (0 until a % 5).map { i => makeRddInfo(a + i, b + i, c + i, d + i, e + i) }
new StageInfo(a, "greetings", b, rddInfos, "details")
val stageInfo = new StageInfo(a, "greetings", b, rddInfos, "details")
stageInfo.accumulatedValues("acc1") = "val1"
stageInfo.accumulatedValues("acc2") = "val2"
stageInfo
}

private def makeTaskInfo(a: Long, b: Int, c: Int, d: Long, speculative: Boolean) = {
new TaskInfo(a, b, c, d, "executor", "your kind sir", TaskLocality.NODE_LOCAL, speculative)
val taskInfo = new TaskInfo(a, b, c, d, "executor", "your kind sir", TaskLocality.NODE_LOCAL,
speculative)
taskInfo.accumulableValues += (("acc1", "val1"))
taskInfo.accumulableValues += (("acc1", "val1"))
taskInfo.accumulableValues += (("acc2", "val2"))
taskInfo
}

/**
Expand Down Expand Up @@ -538,7 +548,8 @@ class JsonProtocolSuite extends FunSuite {
private val stageSubmittedJsonString =
"""
{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":100,"Stage Name":
"greetings","Number of Tasks":200,"RDD Info":[],"Details":"details"},"Properties":
"greetings","Number of Tasks":200,"RDD Info":[],"Details":"details",
"AccumulatedValues":{"acc2":"val2","acc1":"val1"}},"Properties":
{"France":"Paris","Germany":"Berlin","Russia":"Moscow","Ukraine":"Kiev"}}
"""

Expand All @@ -548,23 +559,25 @@ class JsonProtocolSuite extends FunSuite {
"greetings","Number of Tasks":201,"RDD Info":[{"RDD ID":101,"Name":"mayor","Storage
Level":{"Use Disk":true,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,
"Replication":1},"Number of Partitions":201,"Number of Cached Partitions":301,
"Memory Size":401,"Tachyon Size":0,"Disk Size":501}],"Details":"details"}}
"Memory Size":401,"Tachyon Size":0,"Disk Size":501}],"Details":"details",
"AccumulatedValues":{"acc2":"val2","acc1":"val1"}}}
"""

private val taskStartJsonString =
"""
|{"Event":"SparkListenerTaskStart","Stage ID":111,"Task Info":{"Task ID":222,
|"Index":333,"Attempt":1,"Launch Time":444,"Executor ID":"executor","Host":"your kind sir",
|"Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,
|"Failed":false}}
|"Failed":false,"AccumulableUpdates":[{"acc1":"val1"},{"acc1":"val1"},{"acc2":"val2"}]}}
""".stripMargin

private val taskGettingResultJsonString =
"""
|{"Event":"SparkListenerTaskGettingResult","Task Info":
| {"Task ID":1000,"Index":2000,"Attempt":5,"Launch Time":3000,"Executor ID":"executor",
| "Host":"your kind sir","Locality":"NODE_LOCAL","Speculative":true,"Getting Result Time":0,
| "Finish Time":0,"Failed":false
| "Finish Time":0,"Failed":false,
| "AccumulableUpdates":[{"acc1":"val1"},{"acc1":"val1"},{"acc2":"val2"}]
| }
|}
""".stripMargin
Expand All @@ -576,7 +589,8 @@ class JsonProtocolSuite extends FunSuite {
|"Task Info":{
| "Task ID":123,"Index":234,"Attempt":67,"Launch Time":345,"Executor ID":"executor",
| "Host":"your kind sir","Locality":"NODE_LOCAL","Speculative":false,
| "Getting Result Time":0,"Finish Time":0,"Failed":false
| "Getting Result Time":0,"Finish Time":0,"Failed":false,
| "AccumulableUpdates":[{"acc1":"val1"},{"acc1":"val1"},{"acc2":"val2"}]
|},
|"Task Metrics":{
| "Host Name":"localhost","Executor Deserialize Time":300,"Executor Run Time":400,
Expand Down Expand Up @@ -616,7 +630,8 @@ class JsonProtocolSuite extends FunSuite {
|"Task Info":{
| "Task ID":123,"Index":234,"Attempt":67,"Launch Time":345,"Executor ID":"executor",
| "Host":"your kind sir","Locality":"NODE_LOCAL","Speculative":false,
| "Getting Result Time":0,"Finish Time":0,"Failed":false
| "Getting Result Time":0,"Finish Time":0,"Failed":false,
| "AccumulableUpdates":[{"acc1":"val1"},{"acc1":"val1"},{"acc2":"val2"}]
|},
|"Task Metrics":{
| "Host Name":"localhost","Executor Deserialize Time":300,"Executor Run Time":400,
Expand Down

0 comments on commit 7a63abc

Please sign in to comment.