From 51bbbe46a0d2cb3249164d49706adb523df97533 Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Tue, 28 Oct 2014 15:20:01 -0700 Subject: [PATCH 1/6] Changes to make SparkR work with YARN --- pkg/src/Makefile | 11 +++- pkg/src/build.sbt | 9 +++ pkg/src/pom.xml | 164 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 181 insertions(+), 3 deletions(-) diff --git a/pkg/src/Makefile b/pkg/src/Makefile index 6332fc3e00dbf..77385d1cd6f16 100644 --- a/pkg/src/Makefile +++ b/pkg/src/Makefile @@ -15,6 +15,7 @@ RESOURCE_FILES := $(wildcard $(RESOURCE_DIR)/*) SPARK_HADOOP_VERSION ?= 1.0.4 SPARK_VERSION ?= 1.1.0 +SPARK_YARN_VERSION ?= 2.4.0 ifdef USE_MAVEN TARGET_NAME := $(MAVEN_TARGET_NAME) @@ -24,6 +25,14 @@ else BUILD_TOOL := sbt/sbt endif +ifdef USE_YARN + MAVEN_YARN_FLAG := "-Pyarn" + SBT_YARN_FLAG := "yarn" +else + MAVEN_YARN_FLAG := "" + SBT_YARN_FLAG := "" +endif + all: $(TARGET_NAME) $(SBT_TARGET_NAME): build.sbt $(SCALA_FILES) $(RESOURCE_FILES) @@ -31,7 +40,7 @@ $(SBT_TARGET_NAME): build.sbt $(SCALA_FILES) $(RESOURCE_FILES) cp -f $(SBT_TARGET_NAME) ../inst/ $(MAVEN_TARGET_NAME): pom.xml $(SCALA_FILES) $(RESOURCE_FILES) - mvn -Dhadoop.version=$(SPARK_HADOOP_VERSION) -Dspark.version=$(SPARK_VERSION) -DskipTests clean package shade:shade + mvn -Dhadoop.version=$(SPARK_HADOOP_VERSION) -Dspark.version=$(SPARK_VERSION) -DskipTests $(MAVEN_YARN_FLAG) -Dyarn.version=$(SPARK_YARN_VERSION) clean package shade:shade cp -f $(MAVEN_TARGET_NAME) ../inst/$(JAR_NAME) clean: diff --git a/pkg/src/build.sbt b/pkg/src/build.sbt index d3302d9f6b4f9..e664baed2294b 100644 --- a/pkg/src/build.sbt +++ b/pkg/src/build.sbt @@ -25,6 +25,15 @@ libraryDependencies ++= Seq( val excludeNetty = ExclusionRule(organization = "org.jboss.netty") val excludeAsm = ExclusionRule(organization = "asm") val excludeSnappy = ExclusionRule(organization = "org.xerial.snappy") + val sbtYarnFlag = scala.util.Properties.envOrElse("SBT_YARN_FLAG", "") + if (sbtYarnFlag == "yarn") { + val defaultYarnVersion = "2.4.0" + val yarnVersion = scala.util.Properties.envOrElse("SPARK_YARN_VERSION", defaultYarnVersion) + libraryDependencies += "org.apache.hadoop" % "hadoop-yarn-api" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) + libraryDependencies += "org.apache.hadoop" % "hadoop-yarn-common" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) + libraryDependencies += "org.apache.hadoop" % "hadoop-yarn-server-web-proxy" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) + libraryDependencies += "org.apache.hadoop" % "hadoop-yarn-client" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) + } val defaultHadoopVersion = "1.0.4" val defaultSparkVersion = "1.1.0" val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", defaultHadoopVersion) diff --git a/pkg/src/pom.xml b/pkg/src/pom.xml index 15874db6bc7c2..fd247a741fb64 100644 --- a/pkg/src/pom.xml +++ b/pkg/src/pom.xml @@ -6,6 +6,51 @@ jar 0.1 + + central + + Maven Repository + https://repo1.maven.org/maven2 + + true + + + false + + + + apache-repo + Apache Repository + https://repository.apache.org/content/repositories/releases + + true + + + false + + + + jboss-repo + JBoss Repository + https://repository.jboss.org/nexus/content/repositories/releases + + true + + + false + + + + cloudera-repo + Cloudera Repository + https://repository.cloudera.com/artifactory/cloudera-repos + + true + + + false + + Spray.cc repository http://repo.spray.cc @@ -73,6 +118,7 @@ 2.10.3 + UTF-8 @@ -82,8 +128,6 @@ 2.10.3 2.10 - 1.0.4 - 64m 512m @@ -298,4 +342,120 @@ + + + yarn + + + org.apache.hadoop + hadoop-yarn-api + ${yarn.version} + + + javax.servlet + servlet-api + + + asm + asm + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + commons-logging + commons-logging + + + + + org.apache.hadoop + hadoop-yarn-common + ${yarn.version} + + + asm + asm + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + javax.servlet + servlet-api + + + commons-logging + commons-logging + + + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + ${yarn.version} + + + asm + asm + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + javax.servlet + servlet-api + + + commons-logging + commons-logging + + + + + org.apache.hadoop + hadoop-yarn-client + ${yarn.version} + + + asm + asm + + + org.ow2.asm + asm + + + org.jboss.netty + netty + + + javax.servlet + servlet-api + + + commons-logging + commons-logging + + + + + + + From af5fe77b9c77748b3ed9b8d43ab1a31cc71be8ab Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Tue, 28 Oct 2014 23:26:57 -0700 Subject: [PATCH 2/6] Fix SBT build, add dependency tree plugin --- pkg/src/build.sbt | 29 +++++++++++++++++++---------- pkg/src/project/plugins.sbt | 2 ++ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pkg/src/build.sbt b/pkg/src/build.sbt index e664baed2294b..800b7ba35c3c2 100644 --- a/pkg/src/build.sbt +++ b/pkg/src/build.sbt @@ -6,6 +6,8 @@ import AssemblyKeys._ assemblySettings +net.virtualvoid.sbt.graph.Plugin.graphSettings + name := "sparkr" version := "0.1" @@ -26,20 +28,27 @@ libraryDependencies ++= Seq( val excludeAsm = ExclusionRule(organization = "asm") val excludeSnappy = ExclusionRule(organization = "org.xerial.snappy") val sbtYarnFlag = scala.util.Properties.envOrElse("SBT_YARN_FLAG", "") - if (sbtYarnFlag == "yarn") { - val defaultYarnVersion = "2.4.0" - val yarnVersion = scala.util.Properties.envOrElse("SPARK_YARN_VERSION", defaultYarnVersion) - libraryDependencies += "org.apache.hadoop" % "hadoop-yarn-api" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) - libraryDependencies += "org.apache.hadoop" % "hadoop-yarn-common" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) - libraryDependencies += "org.apache.hadoop" % "hadoop-yarn-server-web-proxy" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) - libraryDependencies += "org.apache.hadoop" % "hadoop-yarn-client" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) - } val defaultHadoopVersion = "1.0.4" val defaultSparkVersion = "1.1.0" val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", defaultHadoopVersion) val sparkVersion = scala.util.Properties.envOrElse("SPARK_VERSION", defaultSparkVersion) - libraryDependencies += "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) - libraryDependencies += "org.apache.spark" % "spark-core_2.10" % sparkVersion + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), + "org.apache.spark" % "spark-core_2.10" % sparkVersion + ) ++ (if (sbtYarnFlag == "yarn") { + val defaultYarnVersion = "2.4.0" + val yarnVersion = scala.util.Properties.envOrElse("SPARK_YARN_VERSION", defaultYarnVersion) + Seq( + "org.apache.hadoop" % "hadoop-yarn-api" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), + "org.apache.hadoop" % "hadoop-yarn-common" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), + "org.apache.hadoop" % "hadoop-yarn-server-web-proxy" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), + "org.apache.hadoop" % "hadoop-yarn-client" % yarnVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), + "org.apache.spark" % "spark-yarn_2.10" % sparkVersion + ) + } else { + None.toSeq + } + ) } resolvers ++= Seq( diff --git a/pkg/src/project/plugins.sbt b/pkg/src/project/plugins.sbt index 6d31a65f305a3..e5f9f66607e5a 100644 --- a/pkg/src/project/plugins.sbt +++ b/pkg/src/project/plugins.sbt @@ -1,3 +1,5 @@ resolvers += "Sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots/" addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.1") + +addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4") From bf0797f7e56a5adc4c85e919c92237d3c16d2e77 Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Tue, 28 Oct 2014 23:27:14 -0700 Subject: [PATCH 3/6] Add dependency on spark yarn module --- pkg/src/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/src/pom.xml b/pkg/src/pom.xml index fd247a741fb64..f1ff59c15b34f 100644 --- a/pkg/src/pom.xml +++ b/pkg/src/pom.xml @@ -454,6 +454,11 @@ + + org.apache.spark + spark-yarn_2.10 + ${spark.version} + From 86b04ebf5ae4c7329d3c4ca6ca33e20ec3b4d8d7 Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Tue, 28 Oct 2014 23:27:33 -0700 Subject: [PATCH 4/6] Don't use quotes around yarn --- pkg/src/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/src/Makefile b/pkg/src/Makefile index 77385d1cd6f16..6f36febdcea34 100644 --- a/pkg/src/Makefile +++ b/pkg/src/Makefile @@ -27,10 +27,10 @@ endif ifdef USE_YARN MAVEN_YARN_FLAG := "-Pyarn" - SBT_YARN_FLAG := "yarn" + SBT_YARN_FLAG := yarn else MAVEN_YARN_FLAG := "" - SBT_YARN_FLAG := "" + SBT_YARN_FLAG := endif all: $(TARGET_NAME) From a5459c5a4e9baceddfa4d0ff2e1d461ef28c22f4 Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Tue, 28 Oct 2014 23:46:39 -0700 Subject: [PATCH 5/6] Consolidate yarn flags --- pkg/src/Makefile | 2 -- pkg/src/build.sbt | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pkg/src/Makefile b/pkg/src/Makefile index 6f36febdcea34..69797486a7b06 100644 --- a/pkg/src/Makefile +++ b/pkg/src/Makefile @@ -27,10 +27,8 @@ endif ifdef USE_YARN MAVEN_YARN_FLAG := "-Pyarn" - SBT_YARN_FLAG := yarn else MAVEN_YARN_FLAG := "" - SBT_YARN_FLAG := endif all: $(TARGET_NAME) diff --git a/pkg/src/build.sbt b/pkg/src/build.sbt index 800b7ba35c3c2..8c2196624d1dd 100644 --- a/pkg/src/build.sbt +++ b/pkg/src/build.sbt @@ -27,7 +27,7 @@ libraryDependencies ++= Seq( val excludeNetty = ExclusionRule(organization = "org.jboss.netty") val excludeAsm = ExclusionRule(organization = "asm") val excludeSnappy = ExclusionRule(organization = "org.xerial.snappy") - val sbtYarnFlag = scala.util.Properties.envOrElse("SBT_YARN_FLAG", "") + val sbtYarnFlag = scala.util.Properties.envOrElse("USE_YARN", "") val defaultHadoopVersion = "1.0.4" val defaultSparkVersion = "1.1.0" val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", defaultHadoopVersion) @@ -35,7 +35,7 @@ libraryDependencies ++= Seq( libraryDependencies ++= Seq( "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), "org.apache.spark" % "spark-core_2.10" % sparkVersion - ) ++ (if (sbtYarnFlag == "yarn") { + ) ++ (if (sbtYarnFlag != "") { val defaultYarnVersion = "2.4.0" val yarnVersion = scala.util.Properties.envOrElse("SPARK_YARN_VERSION", defaultYarnVersion) Seq( From 5951d3bff61e9bf3ba36de22991e593048cf7156 Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman Date: Wed, 29 Oct 2014 14:12:15 -0700 Subject: [PATCH 6/6] Remove SBT plugin --- pkg/src/build.sbt | 2 -- pkg/src/project/plugins.sbt | 2 -- 2 files changed, 4 deletions(-) diff --git a/pkg/src/build.sbt b/pkg/src/build.sbt index 8c2196624d1dd..4ab8dd0c6314f 100644 --- a/pkg/src/build.sbt +++ b/pkg/src/build.sbt @@ -6,8 +6,6 @@ import AssemblyKeys._ assemblySettings -net.virtualvoid.sbt.graph.Plugin.graphSettings - name := "sparkr" version := "0.1" diff --git a/pkg/src/project/plugins.sbt b/pkg/src/project/plugins.sbt index e5f9f66607e5a..6d31a65f305a3 100644 --- a/pkg/src/project/plugins.sbt +++ b/pkg/src/project/plugins.sbt @@ -1,5 +1,3 @@ resolvers += "Sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots/" addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.1") - -addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")