Did you specify the correct logging directory?
+ Please verify your setting of
+ spark.history.fs.logDirectory and whether you have the permissions to
+ access it.
It is also possible that your application did not run to
+ completion or did not stop the SparkContext.
+
Executor ID | Address | diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala new file mode 100644 index 0000000000000..ea2d187a0e8e4 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui.jobs + +import scala.xml.{Node, NodeSeq} + +import javax.servlet.http.HttpServletRequest + +import org.apache.spark.JobExecutionStatus +import org.apache.spark.ui.{WebUIPage, UIUtils} +import org.apache.spark.ui.jobs.UIData.JobUIData + +/** Page showing list of all ongoing and recently finished jobs */ +private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { + private val startTime: Option[Long] = parent.sc.map(_.startTime) + private val listener = parent.listener + + private def jobsTable(jobs: Seq[JobUIData]): Seq[Node] = { + val someJobHasJobGroup = jobs.exists(_.jobGroup.isDefined) + + val columns: Seq[Node] = { +{if (someJobHasJobGroup) "Job Id (Job Group)" else "Job Id"} | +Description | +Submitted | +Duration | +Stages: Succeeded/Total | +Tasks (for all stages): Succeeded/Total | + } + + def makeRow(job: JobUIData): Seq[Node] = { + val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max) + val lastStageData = lastStageInfo.flatMap { s => + listener.stageIdToData.get((s.stageId, s.attemptId)) + } + val isComplete = job.status == JobExecutionStatus.SUCCEEDED + val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)") + val lastStageDescription = lastStageData.flatMap(_.description).getOrElse("") + val duration: Option[Long] = { + job.startTime.map { start => + val end = job.endTime.getOrElse(System.currentTimeMillis()) + end - start + } + } + val formattedDuration = duration.map(d => UIUtils.formatDuration(d)).getOrElse("Unknown") + val formattedSubmissionTime = job.startTime.map(UIUtils.formatDate).getOrElse("Unknown") + val detailUrl = + "%s/jobs/job?id=%s".format(UIUtils.prependBaseUri(parent.basePath), job.jobId) +
---|---|---|---|---|---|
+ {job.jobId} {job.jobGroup.map(id => s"($id)").getOrElse("")} + | +
+ {lastStageDescription}
+ {lastStageName}
+ |
+ + {formattedSubmissionTime} + | +{formattedDuration} | ++ {job.completedStageIndices.size}/{job.stageIds.size - job.numSkippedStages} + {if (job.numFailedStages > 0) s"(${job.numFailedStages} failed)"} + {if (job.numSkippedStages > 0) s"(${job.numSkippedStages} skipped)"} + | ++ {UIUtils.makeProgressBar(started = job.numActiveTasks, completed = job.numCompletedTasks, + failed = job.numFailedTasks, skipped = job.numSkippedTasks, + total = job.numTasks - job.numSkippedTasks)} + | +
Executor ID | Address | diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala new file mode 100644 index 0000000000000..77d36209c6048 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui.jobs + +import scala.collection.mutable +import scala.xml.{NodeSeq, Node} + +import javax.servlet.http.HttpServletRequest + +import org.apache.spark.JobExecutionStatus +import org.apache.spark.scheduler.StageInfo +import org.apache.spark.ui.{UIUtils, WebUIPage} + +/** Page showing statistics and stage list for a given job */ +private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") { + private val listener = parent.listener + + def render(request: HttpServletRequest): Seq[Node] = { + listener.synchronized { + val jobId = request.getParameter("id").toInt + val jobDataOption = listener.jobIdToData.get(jobId) + if (jobDataOption.isEmpty) { + val content = +Stage Id | ++ @@ -73,25 +72,11 @@ private[ui] class StageTableBase(
---|
-
-
-
-
-
-
-
-
-
- -- -In the following example we use the `mapReduceTriplets` operator to compute the average age of the -more senior followers of each user. + + +In addition, [`aggregateMessages`][Graph.aggregateMessages] takes an optional +`tripletsFields` which indicates what data is accessed in the [`EdgeContext`][EdgeContext] +(i.e., the source vertex attribute but not the destination vertex attribute). +The possible options for the `tripletsFields` are defined in [`TripletFields`][TripletFields] and +the default value is [`TripletFields.All`][TripletFields.All] which indicates that the user +defined `sendMsg` function may access any of the fields in the [`EdgeContext`][EdgeContext]. +The `tripletFields` argument can be used to notify GraphX that only part of the +[`EdgeContext`][EdgeContext] will be needed allowing GraphX to select an optimized join strategy. +For example if we are computing the average age of the followers of each user we would only require +the source field and so we would use [`TripletFields.Src`][TripletFields.Src] to indicate that we +only require the source field + +> In earlier versions of GraphX we used byte code inspection to infer the +[`TripletFields`][TripletFields] however we have found that bytecode inspection to be +slightly unreliable and instead opted for more explicit user control. + +In the following example we use the [`aggregateMessages`][Graph.aggregateMessages] operator to +compute the average age of the more senior followers of each user. {% highlight scala %} // Import random graph generation library @@ -622,14 +608,11 @@ import org.apache.spark.graphx.util.GraphGenerators val graph: Graph[Double, Int] = GraphGenerators.logNormalGraph(sc, numVertices = 100).mapVertices( (id, _) => id.toDouble ) // Compute the number of older followers and their total age -val olderFollowers: VertexRDD[(Int, Double)] = graph.mapReduceTriplets[(Int, Double)]( +val olderFollowers: VertexRDD[(Int, Double)] = graph.aggregateMessages[(Int, Double)]( triplet => { // Map Function if (triplet.srcAttr > triplet.dstAttr) { // Send message to destination vertex containing counter and age - Iterator((triplet.dstId, (1, triplet.srcAttr))) - } else { - // Don't send a message for this triplet - Iterator.empty + triplet.sendToDst(1, triplet.srcAttr) } }, // Add counter and age @@ -642,10 +625,57 @@ val avgAgeOfOlderFollowers: VertexRDD[Double] = avgAgeOfOlderFollowers.collect.foreach(println(_)) {% endhighlight %} -> Note that the `mapReduceTriplets` operation performs optimally when the messages (and the sums of -> messages) are constant sized (e.g., floats and addition instead of lists and concatenation). More -> precisely, the result of `mapReduceTriplets` should ideally be sub-linear in the degree of each -> vertex. +> The `aggregateMessages` operation performs optimally when the messages (and the sums of +> messages) are constant sized (e.g., floats and addition instead of lists and concatenation). + + + +### Map Reduce Triplets Transition Guide (Legacy) + +In earlier versions of GraphX we neighborhood aggregation was accomplished using the +[`mapReduceTriplets`][Graph.mapReduceTriplets] operator: + +{% highlight scala %} +class Graph[VD, ED] { + def mapReduceTriplets[Msg]( + map: EdgeTriplet[VD, ED] => Iterator[(VertexId, Msg)], + reduce: (Msg, Msg) => Msg) + : VertexRDD[Msg] +} +{% endhighlight %} + +The [`mapReduceTriplets`][Graph.mapReduceTriplets] operator takes a user defined map function which +is applied to each triplet and can yield *messages* which are aggregated using the user defined +`reduce` function. +However, we found the user of the returned iterator to be expensive and it inhibited our ability to +apply additional optimizations (e.g., local vertex renumbering). +In [`aggregateMessages`][Graph.aggregateMessages] we introduced the EdgeContext which exposes the +triplet fields and also functions to explicitly send messages to the source and destination vertex. +Furthermore we removed bytecode inspection and instead require the user to indicate what fields +in the triplet are actually required. + +The following code block using `mapReduceTriplets`: + +{% highlight scala %} +val graph: Graph[Int, Float] = ... +def msgFun(triplet: Triplet[Int, Float]): Iterator[(Int, String)] = { + Iterator((triplet.dstId, "Hi")) +} +def reduceFun(a: Int, b: Int): Int = a + b +val result = graph.mapReduceTriplets[String](msgFun, reduceFun) +{% endhighlight %} + +can be rewritten using `aggregateMessages` as: + +{% highlight scala %} +val graph: Graph[Int, Float] = ... +def msgFun(triplet: EdgeContext[Int, Float, String]) { + triplet.sendToDst("Hi") +} +def reduceFun(a: Int, b: Int): Int = a + b +val result = graph.aggregateMessages[String](msgFun, reduceFun) +{% endhighlight %} + ### Computing Degree Information @@ -673,10 +703,6 @@ attributes at each vertex. This can be easily accomplished using the [`collectNeighborIds`][GraphOps.collectNeighborIds] and the [`collectNeighbors`][GraphOps.collectNeighbors] operators. -[GraphOps.collectNeighborIds]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighborIds(EdgeDirection):VertexRDD[Array[VertexId]] -[GraphOps.collectNeighbors]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighbors(EdgeDirection):VertexRDD[Array[(VertexId,VD)]] - - {% highlight scala %} class GraphOps[VD, ED] { def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]] @@ -684,36 +710,34 @@ class GraphOps[VD, ED] { } {% endhighlight %} -> Note that these operators can be quite costly as they duplicate information and require +> These operators can be quite costly as they duplicate information and require > substantial communication. If possible try expressing the same computation using the -> `mapReduceTriplets` operator directly. +> [`aggregateMessages`][Graph.aggregateMessages] operator directly. ## Caching and Uncaching In Spark, RDDs are not persisted in memory by default. To avoid recomputation, they must be explicitly cached when using them multiple times (see the [Spark Programming Guide][RDD Persistence]). Graphs in GraphX behave the same way. **When using a graph multiple times, make sure to call [`Graph.cache()`][Graph.cache] on it first.** -[RDD Persistence]: programming-guide.html#rdd-persistence -[Graph.cache]: api/scala/index.html#org.apache.spark.graphx.Graph@cache():Graph[VD,ED] In iterative computations, *uncaching* may also be necessary for best performance. By default, cached RDDs and graphs will remain in memory until memory pressure forces them to be evicted in LRU order. For iterative computation, intermediate results from previous iterations will fill up the cache. Though they will eventually be evicted, the unnecessary data stored in memory will slow down garbage collection. It would be more efficient to uncache intermediate results as soon as they are no longer necessary. This involves materializing (caching and forcing) a graph or RDD every iteration, uncaching all other datasets, and only using the materialized dataset in future iterations. However, because graphs are composed of multiple RDDs, it can be difficult to unpersist them correctly. **For iterative computation we recommend using the Pregel API, which correctly unpersists intermediate results.** -# Pregel API -Graphs are inherently recursive data-structures as properties of vertices depend on properties of +# Pregel API + +Graphs are inherently recursive data structures as properties of vertices depend on properties of their neighbors which in turn depend on properties of *their* neighbors. As a consequence many important graph algorithms iteratively recompute the properties of each vertex until a fixed-point condition is reached. A range of graph-parallel abstractions have been proposed -to express these iterative algorithms. GraphX exposes a Pregel-like operator which is a fusion of -the widely used Pregel and GraphLab abstractions. +to express these iterative algorithms. GraphX exposes a variant of the Pregel API. -At a high-level the Pregel operator in GraphX is a bulk-synchronous parallel messaging abstraction -*constrained to the topology of the graph*. The Pregel operator executes in a series of super-steps -in which vertices receive the *sum* of their inbound messages from the previous super- step, compute +At a high level the Pregel operator in GraphX is a bulk-synchronous parallel messaging abstraction +*constrained to the topology of the graph*. The Pregel operator executes in a series of super steps +in which vertices receive the *sum* of their inbound messages from the previous super step, compute a new value for the vertex property, and then send messages to neighboring vertices in the next -super-step. Unlike Pregel and instead more like GraphLab messages are computed in parallel as a +super step. Unlike Pregel, messages are computed in parallel as a function of the edge triplet and the message computation has access to both the source and -destination vertex attributes. Vertices that do not receive a message are skipped within a super- +destination vertex attributes. Vertices that do not receive a message are skipped within a super step. The Pregel operators terminates iteration and returns the final graph when there are no messages remaining. @@ -724,8 +748,6 @@ messages remaining. The following is the type signature of the [Pregel operator][GraphOps.pregel] as well as a *sketch* of its implementation (note calls to graph.cache have been removed): -[GraphOps.pregel]: api/scala/index.html#org.apache.spark.graphx.GraphOps@pregel[A](A,Int,EdgeDirection)((VertexId,VD,A)⇒VD,(EdgeTriplet[VD,ED])⇒Iterator[(VertexId,A)],(A,A)⇒A)(ClassTag[A]):Graph[VD,ED] - {% highlight scala %} class GraphOps[VD, ED] { def pregel[A] @@ -795,9 +817,10 @@ val sssp = initialGraph.pregel(Double.PositiveInfinity)( println(sssp.vertices.collect.mkString("\n")) {% endhighlight %} -# Graph Builders +# Graph Builders + GraphX provides several ways of building a graph from a collection of vertices and edges in an RDD or on disk. None of the graph builders repartitions the graph's edges by default; instead, edges are left in their default partitions (such as their original blocks in HDFS). [`Graph.groupEdges`][Graph.groupEdges] requires the graph to be repartitioned because it assumes identical edges will be colocated on the same partition, so you must call [`Graph.partitionBy`][Graph.partitionBy] before calling `groupEdges`. {% highlight scala %} @@ -848,18 +871,12 @@ object Graph { [`Graph.fromEdgeTuples`][Graph.fromEdgeTuples] allows creating a graph from only an RDD of edge tuples, assigning the edges the value 1, and automatically creating any vertices mentioned by edges and assigning them the default value. It also supports deduplicating the edges; to deduplicate, pass `Some` of a [`PartitionStrategy`][PartitionStrategy] as the `uniqueEdges` parameter (for example, `uniqueEdges = Some(PartitionStrategy.RandomVertexCut)`). A partition strategy is necessary to colocate identical edges on the same partition so they can be deduplicated. -[PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy$ - -[GraphLoader.edgeListFile]: api/scala/index.html#org.apache.spark.graphx.GraphLoader$@edgeListFile(SparkContext,String,Boolean,Int):Graph[Int,Int] -[Graph.apply]: api/scala/index.html#org.apache.spark.graphx.Graph$@apply[VD,ED](RDD[(VertexId,VD)],RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED] -[Graph.fromEdgeTuples]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdgeTuples[VD](RDD[(VertexId,VertexId)],VD,Option[PartitionStrategy])(ClassTag[VD]):Graph[VD,Int] -[Graph.fromEdges]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdges[VD,ED](RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED] + # Vertex and Edge RDDs - GraphX exposes `RDD` views of the vertices and edges stored within the graph. However, because -GraphX maintains the vertices and edges in optimized data-structures and these data-structures +GraphX maintains the vertices and edges in optimized data structures and these data structures provide additional functionality, the vertices and edges are returned as `VertexRDD` and `EdgeRDD` respectively. In this section we review some of the additional useful functionality in these types. @@ -870,7 +887,7 @@ The `VertexRDD[A]` extends `RDD[(VertexID, A)]` and adds the additional constrai attribute of type `A`. Internally, this is achieved by storing the vertex attributes in a reusable hash-map data-structure. As a consequence if two `VertexRDD`s are derived from the same base `VertexRDD` (e.g., by `filter` or `mapValues`) they can be joined in constant time without hash -evaluations. To leverage this indexed data-structure, the `VertexRDD` exposes the following +evaluations. To leverage this indexed data structure, the `VertexRDD` exposes the following additional functionality: {% highlight scala %} @@ -893,7 +910,7 @@ class VertexRDD[VD] extends RDD[(VertexID, VD)] { Notice, for example, how the `filter` operator returns an `VertexRDD`. Filter is actually implemented using a `BitSet` thereby reusing the index and preserving the ability to do fast joins with other `VertexRDD`s. Likewise, the `mapValues` operators do not allow the `map` function to -change the `VertexID` thereby enabling the same `HashMap` data-structures to be reused. Both the +change the `VertexID` thereby enabling the same `HashMap` data structures to be reused. Both the `leftJoin` and `innerJoin` are able to identify when joining two `VertexRDD`s derived from the same `HashMap` and implement the join by linear scan rather than costly point lookups. @@ -916,21 +933,19 @@ val setC: VertexRDD[Double] = setA.innerJoin(setB)((id, a, b) => a + b) ## EdgeRDDs -The `EdgeRDD[ED, VD]`, which extends `RDD[Edge[ED]]` organizes the edges in blocks partitioned using one +The `EdgeRDD[ED]`, which extends `RDD[Edge[ED]]` organizes the edges in blocks partitioned using one of the various partitioning strategies defined in [`PartitionStrategy`][PartitionStrategy]. Within each partition, edge attributes and adjacency structure, are stored separately enabling maximum reuse when changing attribute values. -[PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy - The three additional functions exposed by the `EdgeRDD` are: {% highlight scala %} // Transform the edge attributes while preserving the structure -def mapValues[ED2](f: Edge[ED] => ED2): EdgeRDD[ED2, VD] +def mapValues[ED2](f: Edge[ED] => ED2): EdgeRDD[ED2] // Revere the edges reusing both attributes and structure -def reverse: EdgeRDD[ED, VD] +def reverse: EdgeRDD[ED] // Join two `EdgeRDD`s partitioned using the same partitioning strategy. -def innerJoin[ED2, ED3](other: EdgeRDD[ED2, VD])(f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3, VD] +def innerJoin[ED2, ED3](other: EdgeRDD[ED2])(f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3] {% endhighlight %} In most applications we have found that operations on the `EdgeRDD` are accomplished through the @@ -960,7 +975,6 @@ the [`Graph.partitionBy`][Graph.partitionBy] operator. The default partitioning the initial partitioning of the edges as provided on graph construction. However, users can easily switch to 2D-partitioning or other heuristics included in GraphX. -[Graph.partitionBy]: api/scala/index.html#org.apache.spark.graphx.Graph$@partitionBy(partitionStrategy:org.apache.spark.graphx.PartitionStrategy):org.apache.spark.graphx.Graph[VD,ED]Note that
- -{% highlight scala %} - activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None -{% endhighlight %} - -mapReduceTriplets
takes an additional optionalactiveSet
-(not shown above see API docs for details) which restricts the map phase to edges adjacent to the -vertices in the providedVertexRDD
:The EdgeDirection specifies which edges adjacent to the vertex set are included in the map -phase. If the direction is
- -In
, then the user definedmap
function will -only be run only on edges with the destination vertex in the active set. If the direction is -Out
, then themap
function will only be run only on edges originating from -vertices in the active set. If the direction isEither
, then themap
-function will be run only on edges with either vertex in the active set. If the direction is -Both
, then themap
function will be run only on edges with both vertices -in the active set. The active set must be derived from the set of vertices in the graph. -Restricting computation to triplets adjacent to a subset of the vertices is often necessary in -incremental iterative computation and is a key part of the GraphX implementation of Pregel.
+# Graph Algorithms
+
GraphX includes a set of graph algorithms to simplify analytics tasks. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via [`GraphOps`][GraphOps]. This section describes the algorithms and how they are used.
-## PageRank
+## PageRank
+
PageRank measures the importance of each vertex in a graph, assuming an edge from *u* to *v* represents an endorsement of *v*'s importance by *u*. For example, if a Twitter user is followed by many others, the user will be ranked highly.
GraphX comes with static and dynamic implementations of PageRank as methods on the [`PageRank` object][PageRank]. Static PageRank runs for a fixed number of iterations, while dynamic PageRank runs until the ranks converge (i.e., stop changing by more than a specified tolerance). [`GraphOps`][GraphOps] allows calling these algorithms directly as methods on `Graph`.
GraphX also includes an example social network dataset that we can run PageRank on. A set of users is given in `graphx/data/users.txt`, and a set of relationships between users is given in `graphx/data/followers.txt`. We compute the PageRank of each user as follows:
-[PageRank]: api/scala/index.html#org.apache.spark.graphx.lib.PageRank$
-
{% highlight scala %}
// Load the edges as a graph
val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt")
@@ -1014,8 +1028,6 @@ println(ranksByUsername.collect().mkString("\n"))
The connected components algorithm labels each connected component of the graph with the ID of its lowest-numbered vertex. For example, in a social network, connected components can approximate clusters. GraphX contains an implementation of the algorithm in the [`ConnectedComponents` object][ConnectedComponents], and we compute the connected components of the example social network dataset from the [PageRank section](#pagerank) as follows:
-[ConnectedComponents]: api/scala/index.html#org.apache.spark.graphx.lib.ConnectedComponents$
-
{% highlight scala %}
// Load the graph as in the PageRank example
val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt")
@@ -1037,9 +1049,6 @@ println(ccByUsername.collect().mkString("\n"))
A vertex is part of a triangle when it has two adjacent vertices with an edge between them. GraphX implements a triangle counting algorithm in the [`TriangleCount` object][TriangleCount] that determines the number of triangles passing through each vertex, providing a measure of clustering. We compute the triangle count of the social network dataset from the [PageRank section](#pagerank). *Note that `TriangleCount` requires the edges to be in canonical orientation (`srcId < dstId`) and the graph to be partitioned using [`Graph.partitionBy`][Graph.partitionBy].*
-[TriangleCount]: api/scala/index.html#org.apache.spark.graphx.lib.TriangleCount$
-[Graph.partitionBy]: api/scala/index.html#org.apache.spark.graphx.Graph@partitionBy(PartitionStrategy):Graph[VD,ED]
-
{% highlight scala %}
// Load the edges in canonical order and partition the graph for triangle count
val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt", true).partitionBy(PartitionStrategy.RandomVertexCut)
diff --git a/docs/img/data_parallel_vs_graph_parallel.png b/docs/img/data_parallel_vs_graph_parallel.png
deleted file mode 100644
index d3918f01d8f3b..0000000000000
Binary files a/docs/img/data_parallel_vs_graph_parallel.png and /dev/null differ
diff --git a/docs/img/graph_analytics_pipeline.png b/docs/img/graph_analytics_pipeline.png
deleted file mode 100644
index 6d606e01894ae..0000000000000
Binary files a/docs/img/graph_analytics_pipeline.png and /dev/null differ
diff --git a/docs/img/tables_and_graphs.png b/docs/img/tables_and_graphs.png
deleted file mode 100644
index ec37bb45a62f0..0000000000000
Binary files a/docs/img/tables_and_graphs.png and /dev/null differ
diff --git a/docs/monitoring.md b/docs/monitoring.md
index e3f81a76acdbb..f32cdef240d31 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -79,7 +79,7 @@ follows: