From c33dc2c1b2d2fc06a69ecfd136576af85bb56226 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Mon, 24 Feb 2014 15:50:26 +0530 Subject: [PATCH 1/9] SPARK-964, Java 8 API Support. This patch adds a few methods to java API such that it is possible to pass lambdas instead of Anonymous classes and also java 6/7 api users can use the same apis by passing anonymous classes. To achieve this a few older API methods are removed and replaced with their ToPair/ToDouble versions. 1) all anonymous classes extending scala Function is replaced by interfaces. 2) Adds optional to run java 8 tests Please refer to PR comments for more details. --- .../java/function/DoubleFlatMapFunction.java} | 11 +- .../api/java/function/DoubleFunction.java} | 11 +- .../api/java/function/FlatMapFunction.java | 24 + .../api/java/function/FlatMapFunction2.java | 24 + .../spark/api/java/function/Function.java} | 12 +- .../spark/api/java/function/Function2.java | 24 + .../spark/api/java/function/Function3.java} | 13 +- .../java/function/PairFlatMapFunction.java | 26 + .../spark/api/java/function/PairFunction.java | 25 + .../spark/api/java/function/VoidFunction.java | 24 + .../apache/spark/api/java/JavaDoubleRDD.scala | 2 +- .../apache/spark/api/java/JavaPairRDD.scala | 68 +- .../org/apache/spark/api/java/JavaRDD.scala | 2 +- .../apache/spark/api/java/JavaRDDLike.scala | 82 +- .../java/function/DoubleFlatMapFunction.scala | 30 - .../api/java/function/DoubleFunction.scala | 29 - .../spark/api/java/function/Function.scala | 31 - .../java/function/PairFlatMapFunction.scala | 36 - .../api/java/function/PairFunction.scala | 33 - .../api/java/function/VoidFunction.scala | 33 - .../api/java/function/WrappedFunction1.scala | 32 - .../api/java/function/WrappedFunction2.scala | 32 - .../api/java/function/WrappedFunction3.scala | 34 - .../java/org/apache/spark/JavaAPISuite.java | 34 +- .../org/apache/spark/examples/JavaHdfsLR.java | 6 +- .../org/apache/spark/examples/JavaKMeans.java | 2 +- .../apache/spark/examples/JavaLogQuery.java | 2 +- .../apache/spark/examples/JavaPageRank.java | 6 +- .../org/apache/spark/examples/JavaTC.java | 6 +- .../apache/spark/examples/JavaWordCount.java | 2 +- .../apache/spark/mllib/examples/JavaALS.java | 4 +- .../spark/mllib/examples/JavaKMeans.java | 2 +- .../apache/spark/mllib/examples/JavaLR.java | 2 +- .../examples/JavaKafkaWordCount.java | 2 +- .../examples/JavaNetworkWordCount.java | 2 +- .../streaming/examples/JavaQueueStream.java | 2 +- .../spark/streaming/zeromq/ZeroMQUtils.scala | 6 +- .../java/org/apache/spark/Java8APISuite.java | 377 ++++++++ .../apache/spark/streaming/Java8APISuite.java | 832 ++++++++++++++++++ project/SparkBuild.scala | 17 +- .../streaming/api/java/JavaDStream.scala | 2 +- .../streaming/api/java/JavaDStreamLike.scala | 55 +- .../streaming/api/java/JavaPairDStream.scala | 25 +- .../api/java/JavaStreamingContext.scala | 4 +- .../apache/spark/streaming/JavaAPISuite.java | 59 +- 45 files changed, 1581 insertions(+), 506 deletions(-) rename core/src/main/{scala/org/apache/spark/api/java/function/FlatMapFunction.scala => java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java} (71%) rename core/src/main/{scala/org/apache/spark/api/java/function/FlatMapFunction2.scala => java/org/apache/spark/api/java/function/DoubleFunction.java} (71%) create mode 100644 core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java create mode 100644 core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java rename core/src/main/{scala/org/apache/spark/api/java/function/Function3.scala => java/org/apache/spark/api/java/function/Function.java} (67%) create mode 100644 core/src/main/java/org/apache/spark/api/java/function/Function2.java rename core/src/main/{scala/org/apache/spark/api/java/function/Function2.scala => java/org/apache/spark/api/java/function/Function3.java} (68%) create mode 100644 core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java create mode 100644 core/src/main/java/org/apache/spark/api/java/function/PairFunction.java create mode 100644 core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java delete mode 100644 core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.scala delete mode 100644 core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.scala delete mode 100644 core/src/main/scala/org/apache/spark/api/java/function/Function.scala delete mode 100644 core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.scala delete mode 100644 core/src/main/scala/org/apache/spark/api/java/function/PairFunction.scala delete mode 100644 core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala delete mode 100644 core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala delete mode 100644 core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala delete mode 100644 core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction3.scala create mode 100644 java8-tests/src/test/java/org/apache/spark/Java8APISuite.java create mode 100644 java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java diff --git a/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala b/core/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java similarity index 71% rename from core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala rename to core/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java index bdb01f7670356..32f4b50c62a38 100644 --- a/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala +++ b/core/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java @@ -15,13 +15,10 @@ * limitations under the License. */ -package org.apache.spark.api.java.function +package org.apache.spark.api.java.function; -import scala.reflect.ClassTag +import java.io.Serializable; -/** - * A function that returns zero or more output records from each input record. - */ -abstract class FlatMapFunction[T, R] extends Function[T, java.lang.Iterable[R]] { - def elementType(): ClassTag[R] = ClassTag.Any.asInstanceOf[ClassTag[R]] +public interface DoubleFlatMapFunction extends Serializable { + public Iterable call(T t) throws Exception; } diff --git a/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala b/core/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java similarity index 71% rename from core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala rename to core/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java index aae1349c5e17c..22cce23b0c9ea 100644 --- a/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala +++ b/core/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java @@ -15,13 +15,10 @@ * limitations under the License. */ -package org.apache.spark.api.java.function +package org.apache.spark.api.java.function; -import scala.reflect.ClassTag +import java.io.Serializable; -/** - * A function that takes two inputs and returns zero or more output records. - */ -abstract class FlatMapFunction2[A, B, C] extends Function2[A, B, java.lang.Iterable[C]] { - def elementType() : ClassTag[C] = ClassTag.Any.asInstanceOf[ClassTag[C]] +public interface DoubleFunction extends Serializable { + public Double call(T t) throws Exception; } diff --git a/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java b/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java new file mode 100644 index 0000000000000..5f51279ca7571 --- /dev/null +++ b/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.api.java.function; + +import java.io.Serializable; + +public interface FlatMapFunction extends Serializable { + public Iterable call(T t) throws Exception; +} \ No newline at end of file diff --git a/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java b/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java new file mode 100644 index 0000000000000..5bb9361ba85aa --- /dev/null +++ b/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.api.java.function; + +import java.io.Serializable; + +public interface FlatMapFunction2 extends Serializable { + public Iterable call(T1 t1, T2 t2) throws Exception; +} \ No newline at end of file diff --git a/core/src/main/scala/org/apache/spark/api/java/function/Function3.scala b/core/src/main/java/org/apache/spark/api/java/function/Function.java similarity index 67% rename from core/src/main/scala/org/apache/spark/api/java/function/Function3.scala rename to core/src/main/java/org/apache/spark/api/java/function/Function.java index 45152891e9272..e4e8e641dcd7a 100644 --- a/core/src/main/scala/org/apache/spark/api/java/function/Function3.scala +++ b/core/src/main/java/org/apache/spark/api/java/function/Function.java @@ -15,14 +15,10 @@ * limitations under the License. */ -package org.apache.spark.api.java.function +package org.apache.spark.api.java.function; -import org.apache.spark.api.java.JavaSparkContext -import scala.reflect.ClassTag +import java.io.Serializable; -/** - * A three-argument function that takes arguments of type T1, T2 and T3 and returns an R. - */ -abstract class Function3[T1, T2, T3, R] extends WrappedFunction3[T1, T2, T3, R] with Serializable { - def returnType(): ClassTag[R] = JavaSparkContext.fakeClassTag +public interface Function extends Serializable { + public R call(T1 v1) throws Exception; } diff --git a/core/src/main/java/org/apache/spark/api/java/function/Function2.java b/core/src/main/java/org/apache/spark/api/java/function/Function2.java new file mode 100644 index 0000000000000..d24e9e3a563f7 --- /dev/null +++ b/core/src/main/java/org/apache/spark/api/java/function/Function2.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.api.java.function; + +import java.io.Serializable; + +public interface Function2 extends Serializable { + public R call(T1 v1, T2 v2) throws Exception; +} diff --git a/core/src/main/scala/org/apache/spark/api/java/function/Function2.scala b/core/src/main/java/org/apache/spark/api/java/function/Function3.java similarity index 68% rename from core/src/main/scala/org/apache/spark/api/java/function/Function2.scala rename to core/src/main/java/org/apache/spark/api/java/function/Function3.java index fa3616cbcb4d2..73e388b0e3825 100644 --- a/core/src/main/scala/org/apache/spark/api/java/function/Function2.scala +++ b/core/src/main/java/org/apache/spark/api/java/function/Function3.java @@ -15,15 +15,10 @@ * limitations under the License. */ -package org.apache.spark.api.java.function +package org.apache.spark.api.java.function; -import scala.reflect.ClassTag -import org.apache.spark.api.java.JavaSparkContext +import java.io.Serializable; -/** - * A two-argument function that takes arguments of type T1 and T2 and returns an R. - */ -abstract class Function2[T1, T2, R] extends WrappedFunction2[T1, T2, R] with Serializable { - def returnType(): ClassTag[R] = JavaSparkContext.fakeClassTag +public interface Function3 extends Serializable { + public R call(T1 v1, T2 v2, T3 v3) throws Exception; } - diff --git a/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java b/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java new file mode 100644 index 0000000000000..61793709d1608 --- /dev/null +++ b/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.api.java.function; + +import scala.Tuple2; + +import java.io.Serializable; + +public interface PairFlatMapFunction extends Serializable { + public Iterable> call(T t) throws Exception; +} diff --git a/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java new file mode 100644 index 0000000000000..707cd4067ed58 --- /dev/null +++ b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.api.java.function; + +import java.io.Serializable; +import scala.Tuple2; + +public interface PairFunction extends Serializable { + public Tuple2 call(T t) throws Exception; +} diff --git a/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java b/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java new file mode 100644 index 0000000000000..dc5ddd5e90410 --- /dev/null +++ b/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.api.java.function; + +import java.io.Serializable; + +public interface VoidFunction extends Serializable { + public void call(T t) throws Exception; +} diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala index 071044463d980..d1787061bc642 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala @@ -83,7 +83,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[JDouble, Ja * Return a new RDD containing only the elements that satisfy a predicate. */ def filter(f: JFunction[JDouble, java.lang.Boolean]): JavaDoubleRDD = - fromRDD(srdd.filter(x => f(x).booleanValue())) + fromRDD(srdd.filter(x => f.call(x).booleanValue())) /** * Return a new RDD that is reduced into `numPartitions` partitions. diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala index 3f672900cb90f..d791f8f5f7485 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala @@ -32,7 +32,7 @@ import org.apache.spark.{HashPartitioner, Partitioner} import org.apache.spark.Partitioner._ import org.apache.spark.SparkContext.rddToPairRDDFunctions import org.apache.spark.api.java.JavaSparkContext.fakeClassTag -import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2} +import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2, PairFunction} import org.apache.spark.partial.{BoundedDouble, PartialResult} import org.apache.spark.rdd.{OrderedRDDFunctions, RDD} import org.apache.spark.storage.StorageLevel @@ -89,7 +89,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) * Return a new RDD containing only the elements that satisfy a predicate. */ def filter(f: JFunction[(K, V), java.lang.Boolean]): JavaPairRDD[K, V] = - new JavaPairRDD[K, V](rdd.filter(x => f(x).booleanValue())) + new JavaPairRDD[K, V](rdd.filter(x => f.call(x).booleanValue())) /** * Return a new RDD that is reduced into `numPartitions` partitions. @@ -165,9 +165,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) * Simplified version of combineByKey that hash-partitions the output RDD. */ def combineByKey[C](createCombiner: JFunction[V, C], - mergeValue: JFunction2[C, V, C], - mergeCombiners: JFunction2[C, C, C], - numPartitions: Int): JavaPairRDD[K, C] = + mergeValue: JFunction2[C, V, C], + mergeCombiners: JFunction2[C, C, C], + numPartitions: Int): JavaPairRDD[K, C] = combineByKey(createCombiner, mergeValue, mergeCombiners, new HashPartitioner(numPartitions)) /** @@ -442,7 +442,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) */ def flatMapValues[U](f: JFunction[V, java.lang.Iterable[U]]): JavaPairRDD[K, U] = { import scala.collection.JavaConverters._ - def fn = (x: V) => f.apply(x).asScala + def fn = (x: V) => f.call(x).asScala implicit val ctag: ClassTag[U] = fakeClassTag fromRDD(rdd.flatMapValues(fn)) } @@ -511,49 +511,49 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) /** Output the RDD to any Hadoop-supported file system. */ def saveAsHadoopFile[F <: OutputFormat[_, _]]( - path: String, - keyClass: Class[_], - valueClass: Class[_], - outputFormatClass: Class[F], - conf: JobConf) { + path: String, + keyClass: Class[_], + valueClass: Class[_], + outputFormatClass: Class[F], + conf: JobConf) { rdd.saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass, conf) } /** Output the RDD to any Hadoop-supported file system. */ def saveAsHadoopFile[F <: OutputFormat[_, _]]( - path: String, - keyClass: Class[_], - valueClass: Class[_], - outputFormatClass: Class[F]) { + path: String, + keyClass: Class[_], + valueClass: Class[_], + outputFormatClass: Class[F]) { rdd.saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass) } /** Output the RDD to any Hadoop-supported file system, compressing with the supplied codec. */ def saveAsHadoopFile[F <: OutputFormat[_, _]]( - path: String, - keyClass: Class[_], - valueClass: Class[_], - outputFormatClass: Class[F], - codec: Class[_ <: CompressionCodec]) { + path: String, + keyClass: Class[_], + valueClass: Class[_], + outputFormatClass: Class[F], + codec: Class[_ <: CompressionCodec]) { rdd.saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass, codec) } /** Output the RDD to any Hadoop-supported file system. */ def saveAsNewAPIHadoopFile[F <: NewOutputFormat[_, _]]( - path: String, - keyClass: Class[_], - valueClass: Class[_], - outputFormatClass: Class[F], - conf: Configuration) { + path: String, + keyClass: Class[_], + valueClass: Class[_], + outputFormatClass: Class[F], + conf: Configuration) { rdd.saveAsNewAPIHadoopFile(path, keyClass, valueClass, outputFormatClass, conf) } /** Output the RDD to any Hadoop-supported file system. */ def saveAsNewAPIHadoopFile[F <: NewOutputFormat[_, _]]( - path: String, - keyClass: Class[_], - valueClass: Class[_], - outputFormatClass: Class[F]) { + path: String, + keyClass: Class[_], + valueClass: Class[_], + outputFormatClass: Class[F]) { rdd.saveAsNewAPIHadoopFile(path, keyClass, valueClass, outputFormatClass) } @@ -699,8 +699,14 @@ object JavaPairRDD { } implicit def toRDD[K, V](rdd: JavaPairRDD[K, V]): RDD[(K, V)] = rdd.rdd - - + private[spark] + implicit def toJFunction2[T1, T2, R](fun: JFunction2[T1, T2, R]) + : Function2[T1, T2, R] = (x: T1, x1: T2) => fun.call(x, x1) + private[spark] + implicit def toJFunction[T, R](fun: JFunction[T, R]): T => R = (x) => fun.call(x) + private[spark] + implicit def pairFunToScalaFun[A, B, C](x: PairFunction[A, B, C]): A => (B, C) = + (y: A) => x.call(y) /** Convert a JavaRDD of key-value pairs to JavaPairRDD. */ def fromJavaRDD[K, V](rdd: JavaRDD[(K, V)]): JavaPairRDD[K, V] = { implicit val ctagK: ClassTag[K] = fakeClassTag diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala index d7ce8fdfc23f4..e973c46edd1ce 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala @@ -70,7 +70,7 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T]) * Return a new RDD containing only the elements that satisfy a predicate. */ def filter(f: JFunction[T, java.lang.Boolean]): JavaRDD[T] = - wrapRDD(rdd.filter((x => f(x).booleanValue()))) + wrapRDD(rdd.filter((x => f.call(x).booleanValue()))) /** * Return a new RDD that is reduced into `numPartitions` partitions. diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index 729668fb679b4..e8727774739d5 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -67,7 +67,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { * Return a new RDD by applying a function to all elements of this RDD. */ def map[R](f: JFunction[T, R]): JavaRDD[R] = - new JavaRDD(rdd.map(f)(f.returnType()))(f.returnType()) + new JavaRDD(rdd.map(f)(fakeClassTag))(fakeClassTag) /** * Return a new RDD by applying a function to each partition of this RDD, while tracking the index @@ -82,15 +82,16 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { /** * Return a new RDD by applying a function to all elements of this RDD. */ - def map[R](f: DoubleFunction[T]): JavaDoubleRDD = - new JavaDoubleRDD(rdd.map(x => f(x).doubleValue())) + def mapToDouble[R](f: DoubleFunction[T]): JavaDoubleRDD = { + new JavaDoubleRDD(rdd.map(x => f.call(x).doubleValue())) + } /** * Return a new RDD by applying a function to all elements of this RDD. */ - def map[K2, V2](f: PairFunction[T, K2, V2]): JavaPairRDD[K2, V2] = { - val ctag = implicitly[ClassTag[Tuple2[K2, V2]]] - new JavaPairRDD(rdd.map(f)(ctag))(f.keyType(), f.valueType()) + def mapToPair[K2, V2](f: PairFunction[T, K2, V2]): JavaPairRDD[K2, V2] = { + def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]] + new JavaPairRDD(rdd.map[(K2, V2)](f)(cm))(fakeClassTag[K2], fakeClassTag[V2]) } /** @@ -99,17 +100,17 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { */ def flatMap[U](f: FlatMapFunction[T, U]): JavaRDD[U] = { import scala.collection.JavaConverters._ - def fn = (x: T) => f.apply(x).asScala - JavaRDD.fromRDD(rdd.flatMap(fn)(f.elementType()))(f.elementType()) + def fn = (x: T) => f.call(x).asScala + JavaRDD.fromRDD(rdd.flatMap(fn)(fakeClassTag[U]))(fakeClassTag[U]) } /** * Return a new RDD by first applying a function to all elements of this * RDD, and then flattening the results. */ - def flatMap(f: DoubleFlatMapFunction[T]): JavaDoubleRDD = { + def flatMapToDouble(f: DoubleFlatMapFunction[T]): JavaDoubleRDD = { import scala.collection.JavaConverters._ - def fn = (x: T) => f.apply(x).asScala + def fn = (x: T) => f.call(x).asScala new JavaDoubleRDD(rdd.flatMap(fn).map((x: java.lang.Double) => x.doubleValue())) } @@ -117,19 +118,19 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { * Return a new RDD by first applying a function to all elements of this * RDD, and then flattening the results. */ - def flatMap[K2, V2](f: PairFlatMapFunction[T, K2, V2]): JavaPairRDD[K2, V2] = { + def flatMapToPair[K2, V2](f: PairFlatMapFunction[T, K2, V2]): JavaPairRDD[K2, V2] = { import scala.collection.JavaConverters._ - def fn = (x: T) => f.apply(x).asScala - val ctag = implicitly[ClassTag[Tuple2[K2, V2]]] - JavaPairRDD.fromRDD(rdd.flatMap(fn)(ctag))(f.keyType(), f.valueType()) + def fn = (x: T) => f.call(x).asScala + def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]] + JavaPairRDD.fromRDD(rdd.flatMap(fn)(cm))(fakeClassTag[K2], fakeClassTag[V2]) } /** * Return a new RDD by applying a function to each partition of this RDD. */ def mapPartitions[U](f: FlatMapFunction[java.util.Iterator[T], U]): JavaRDD[U] = { - def fn = (x: Iterator[T]) => asScalaIterator(f.apply(asJavaIterator(x)).iterator()) - JavaRDD.fromRDD(rdd.mapPartitions(fn)(f.elementType()))(f.elementType()) + def fn = (x: Iterator[T]) => asScalaIterator(f.call(asJavaIterator(x)).iterator()) + JavaRDD.fromRDD(rdd.mapPartitions(fn)(fakeClassTag[U]))(fakeClassTag[U]) } /** @@ -137,52 +138,53 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { */ def mapPartitions[U](f: FlatMapFunction[java.util.Iterator[T], U], preservesPartitioning: Boolean): JavaRDD[U] = { - def fn = (x: Iterator[T]) => asScalaIterator(f.apply(asJavaIterator(x)).iterator()) - JavaRDD.fromRDD(rdd.mapPartitions(fn, preservesPartitioning)(f.elementType()))(f.elementType()) + def fn = (x: Iterator[T]) => asScalaIterator(f.call(asJavaIterator(x)).iterator()) + JavaRDD.fromRDD( + rdd.mapPartitions(fn, preservesPartitioning)(fakeClassTag[U]))(fakeClassTag[U]) } /** - * Return a new RDD by applying a function to each partition of this RDD. + * Return a new RDD by applying a function to each partition of this RDD. */ - def mapPartitions(f: DoubleFlatMapFunction[java.util.Iterator[T]]): JavaDoubleRDD = { - def fn = (x: Iterator[T]) => asScalaIterator(f.apply(asJavaIterator(x)).iterator()) + def mapPartitionsToDouble(f: DoubleFlatMapFunction[java.util.Iterator[T]]): JavaDoubleRDD = { + def fn = (x: Iterator[T]) => asScalaIterator(f.call(asJavaIterator(x)).iterator()) new JavaDoubleRDD(rdd.mapPartitions(fn).map((x: java.lang.Double) => x.doubleValue())) } /** - * Return a new RDD by applying a function to each partition of this RDD. + * Return a new RDD by applying a function to each partition of this RDD. */ - def mapPartitions[K2, V2](f: PairFlatMapFunction[java.util.Iterator[T], K2, V2]): + def mapPartitionsToPair[K2, V2](f: PairFlatMapFunction[java.util.Iterator[T], K2, V2]): JavaPairRDD[K2, V2] = { - def fn = (x: Iterator[T]) => asScalaIterator(f.apply(asJavaIterator(x)).iterator()) - JavaPairRDD.fromRDD(rdd.mapPartitions(fn))(f.keyType(), f.valueType()) + def fn = (x: Iterator[T]) => asScalaIterator(f.call(asJavaIterator(x)).iterator()) + JavaPairRDD.fromRDD(rdd.mapPartitions(fn))(fakeClassTag[K2], fakeClassTag[V2]) } - /** * Return a new RDD by applying a function to each partition of this RDD. */ - def mapPartitions(f: DoubleFlatMapFunction[java.util.Iterator[T]], - preservesPartitioning: Boolean): JavaDoubleRDD = { - def fn = (x: Iterator[T]) => asScalaIterator(f.apply(asJavaIterator(x)).iterator()) + def mapPartitionsToDouble(f: DoubleFlatMapFunction[java.util.Iterator[T]], + preservesPartitioning: Boolean): JavaDoubleRDD = { + def fn = (x: Iterator[T]) => asScalaIterator(f.call(asJavaIterator(x)).iterator()) new JavaDoubleRDD(rdd.mapPartitions(fn, preservesPartitioning) - .map((x: java.lang.Double) => x.doubleValue())) + .map(x => x.doubleValue())) } /** * Return a new RDD by applying a function to each partition of this RDD. */ - def mapPartitions[K2, V2](f: PairFlatMapFunction[java.util.Iterator[T], K2, V2], + def mapPartitionsToPair[K2, V2](f: PairFlatMapFunction[java.util.Iterator[T], K2, V2], preservesPartitioning: Boolean): JavaPairRDD[K2, V2] = { - def fn = (x: Iterator[T]) => asScalaIterator(f.apply(asJavaIterator(x)).iterator()) - JavaPairRDD.fromRDD(rdd.mapPartitions(fn, preservesPartitioning))(f.keyType(), f.valueType()) + def fn = (x: Iterator[T]) => asScalaIterator(f.call(asJavaIterator(x)).iterator()) + JavaPairRDD.fromRDD( + rdd.mapPartitions(fn, preservesPartitioning))(fakeClassTag[K2], fakeClassTag[V2]) } /** * Applies a function f to each partition of this RDD. */ def foreachPartition(f: VoidFunction[java.util.Iterator[T]]) { - rdd.foreachPartition((x => f(asJavaIterator(x)))) + rdd.foreachPartition((x => f.call(asJavaIterator(x)))) } /** @@ -205,7 +207,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { def groupBy[K](f: JFunction[T, K]): JavaPairRDD[K, JList[T]] = { implicit val ctagK: ClassTag[K] = fakeClassTag implicit val ctagV: ClassTag[JList[T]] = fakeClassTag - JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f)(f.returnType))) + JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f)(fakeClassTag))) } /** @@ -215,7 +217,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { def groupBy[K](f: JFunction[T, K], numPartitions: Int): JavaPairRDD[K, JList[T]] = { implicit val ctagK: ClassTag[K] = fakeClassTag implicit val ctagV: ClassTag[JList[T]] = fakeClassTag - JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f, numPartitions)(f.returnType))) + JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f, numPartitions)(fakeClassTag[K]))) } /** @@ -255,9 +257,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { other: JavaRDDLike[U, _], f: FlatMapFunction2[java.util.Iterator[T], java.util.Iterator[U], V]): JavaRDD[V] = { def fn = (x: Iterator[T], y: Iterator[U]) => asScalaIterator( - f.apply(asJavaIterator(x), asJavaIterator(y)).iterator()) + f.call(asJavaIterator(x), asJavaIterator(y)).iterator()) JavaRDD.fromRDD( - rdd.zipPartitions(other.rdd)(fn)(other.classTag, f.elementType()))(f.elementType()) + rdd.zipPartitions(other.rdd)(fn)(other.classTag, fakeClassTag[V]))(fakeClassTag[V]) } // Actions (launch a job to return a value to the user program) @@ -266,7 +268,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { * Applies a function f to all elements of this RDD. */ def foreach(f: VoidFunction[T]) { - val cleanF = rdd.context.clean(f) + val cleanF = rdd.context.clean((x: T) => f.call(x)) rdd.foreach(cleanF) } @@ -320,7 +322,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { */ def aggregate[U](zeroValue: U)(seqOp: JFunction2[U, T, U], combOp: JFunction2[U, U, U]): U = - rdd.aggregate(zeroValue)(seqOp, combOp)(seqOp.returnType) + rdd.aggregate(zeroValue)(seqOp, combOp)(fakeClassTag[U]) /** * Return the number of elements in the RDD. diff --git a/core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.scala deleted file mode 100644 index 7500a8943634b..0000000000000 --- a/core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.api.java.function - -import java.lang.{Double => JDouble, Iterable => JIterable} - -/** - * A function that returns zero or more records of type Double from each input record. - */ -// DoubleFlatMapFunction does not extend FlatMapFunction because flatMap is -// overloaded for both FlatMapFunction and DoubleFlatMapFunction. -abstract class DoubleFlatMapFunction[T] extends WrappedFunction1[T, JIterable[JDouble]] - with Serializable { - // Intentionally left blank -} diff --git a/core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.scala deleted file mode 100644 index 2cdf2e92c3daa..0000000000000 --- a/core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.scala +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.api.java.function - -import java.lang.{Double => JDouble} - -/** - * A function that returns Doubles, and can be used to construct DoubleRDDs. - */ -// DoubleFunction does not extend Function because some UDF functions, like map, -// are overloaded for both Function and DoubleFunction. -abstract class DoubleFunction[T] extends WrappedFunction1[T, JDouble] with Serializable { - // Intentionally left blank -} diff --git a/core/src/main/scala/org/apache/spark/api/java/function/Function.scala b/core/src/main/scala/org/apache/spark/api/java/function/Function.scala deleted file mode 100644 index a5e1701f7718f..0000000000000 --- a/core/src/main/scala/org/apache/spark/api/java/function/Function.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.api.java.function - -import scala.reflect.ClassTag -import org.apache.spark.api.java.JavaSparkContext - -/** - * Base class for functions whose return types do not create special RDDs. PairFunction and - * DoubleFunction are handled separately, to allow PairRDDs and DoubleRDDs to be constructed - * when mapping RDDs of other types. - */ -abstract class Function[T, R] extends WrappedFunction1[T, R] with Serializable { - def returnType(): ClassTag[R] = JavaSparkContext.fakeClassTag -} - diff --git a/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.scala deleted file mode 100644 index 8467bbb892ab0..0000000000000 --- a/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.scala +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.api.java.function - -import java.lang.{Iterable => JIterable} -import org.apache.spark.api.java.JavaSparkContext -import scala.reflect.ClassTag - -/** - * A function that returns zero or more key-value pair records from each input record. The - * key-value pairs are represented as scala.Tuple2 objects. - */ -// PairFlatMapFunction does not extend FlatMapFunction because flatMap is -// overloaded for both FlatMapFunction and PairFlatMapFunction. -abstract class PairFlatMapFunction[T, K, V] extends WrappedFunction1[T, JIterable[(K, V)]] - with Serializable { - - def keyType(): ClassTag[K] = JavaSparkContext.fakeClassTag - - def valueType(): ClassTag[V] = JavaSparkContext.fakeClassTag -} diff --git a/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.scala deleted file mode 100644 index d0ba0b6307ee9..0000000000000 --- a/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.api.java.function - -import scala.reflect.ClassTag -import org.apache.spark.api.java.JavaSparkContext - -/** - * A function that returns key-value pairs (Tuple2), and can be used to construct PairRDDs. - */ -// PairFunction does not extend Function because some UDF functions, like map, -// are overloaded for both Function and PairFunction. -abstract class PairFunction[T, K, V] extends WrappedFunction1[T, (K, V)] with Serializable { - - def keyType(): ClassTag[K] = JavaSparkContext.fakeClassTag - - def valueType(): ClassTag[V] = JavaSparkContext.fakeClassTag -} diff --git a/core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala deleted file mode 100644 index ea94313a4ab59..0000000000000 --- a/core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.api.java.function - -/** - * A function with no return value. - */ -// This allows Java users to write void methods without having to return Unit. -abstract class VoidFunction[T] extends Serializable { - @throws(classOf[Exception]) - def call(t: T) : Unit -} - -// VoidFunction cannot extend AbstractFunction1 (because that would force users to explicitly -// return Unit), so it is implicitly converted to a Function1[T, Unit]: -object VoidFunction { - implicit def toFunction[T](f: VoidFunction[T]) : Function1[T, Unit] = ((x : T) => f.call(x)) -} diff --git a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala deleted file mode 100644 index cfe694f65d558..0000000000000 --- a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.api.java.function - -import scala.runtime.AbstractFunction1 - -/** - * Subclass of Function1 for ease of calling from Java. The main thing it does is re-expose the - * apply() method as call() and declare that it can throw Exception (since AbstractFunction1.apply - * isn't marked to allow that). - */ -private[spark] abstract class WrappedFunction1[T, R] extends AbstractFunction1[T, R] { - @throws(classOf[Exception]) - def call(t: T): R - - final def apply(t: T): R = call(t) -} diff --git a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala deleted file mode 100644 index eb9277c6fb4cb..0000000000000 --- a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.api.java.function - -import scala.runtime.AbstractFunction2 - -/** - * Subclass of Function2 for ease of calling from Java. The main thing it does is re-expose the - * apply() method as call() and declare that it can throw Exception (since AbstractFunction2.apply - * isn't marked to allow that). - */ -private[spark] abstract class WrappedFunction2[T1, T2, R] extends AbstractFunction2[T1, T2, R] { - @throws(classOf[Exception]) - def call(t1: T1, t2: T2): R - - final def apply(t1: T1, t2: T2): R = call(t1, t2) -} diff --git a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction3.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction3.scala deleted file mode 100644 index d314dbdf1d980..0000000000000 --- a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction3.scala +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.api.java.function - -import scala.runtime.AbstractFunction3 - -/** - * Subclass of Function3 for ease of calling from Java. The main thing it does is re-expose the - * apply() method as call() and declare that it can throw Exception (since AbstractFunction3.apply - * isn't marked to allow that). - */ -private[spark] abstract class WrappedFunction3[T1, T2, T3, R] - extends AbstractFunction3[T1, T2, T3, R] { - @throws(classOf[Exception]) - def call(t1: T1, t2: T2, t3: T3): R - - final def apply(t1: T1, t2: T2, t3: T3): R = call(t1, t2, t3) -} - diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java index aa5079c159830..881cd055d9712 100644 --- a/core/src/test/java/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java @@ -386,14 +386,14 @@ public void javaDoubleRDDHistoGram() { @Test public void map() { JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); - JavaDoubleRDD doubles = rdd.map(new DoubleFunction() { + JavaDoubleRDD doubles = rdd.mapToDouble(new DoubleFunction() { @Override public Double call(Integer x) { return 1.0 * x; } }).cache(); doubles.collect(); - JavaPairRDD pairs = rdd.map(new PairFunction() { + JavaPairRDD pairs = rdd.mapToPair(new PairFunction() { @Override public Tuple2 call(Integer x) { return new Tuple2(x, x); @@ -422,7 +422,7 @@ public Iterable call(String x) { Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); - JavaPairRDD pairs = rdd.flatMap( + JavaPairRDD pairs = rdd.flatMapToPair( new PairFlatMapFunction() { @Override @@ -436,7 +436,7 @@ public Iterable> call(String s) { Assert.assertEquals(new Tuple2("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); - JavaDoubleRDD doubles = rdd.flatMap(new DoubleFlatMapFunction() { + JavaDoubleRDD doubles = rdd.flatMapToDouble(new DoubleFlatMapFunction() { @Override public Iterable call(String s) { List lengths = new LinkedList(); @@ -459,7 +459,7 @@ public void mapsFromPairsToPairs() { JavaPairRDD pairRDD = sc.parallelizePairs(pairs); // Regression test for SPARK-668: - JavaPairRDD swapped = pairRDD.flatMap( + JavaPairRDD swapped = pairRDD.flatMapToPair( new PairFlatMapFunction, String, Integer>() { @Override public Iterable> call(Tuple2 item) throws Exception { @@ -469,7 +469,7 @@ public Iterable> call(Tuple2 item) thro swapped.collect(); // There was never a bug here, but it's worth testing: - pairRDD.map(new PairFunction, String, Integer>() { + pairRDD.mapToPair(new PairFunction, String, Integer>() { @Override public Tuple2 call(Tuple2 item) throws Exception { return item.swap(); @@ -592,7 +592,7 @@ public void sequenceFile() { ); JavaPairRDD rdd = sc.parallelizePairs(pairs); - rdd.map(new PairFunction, IntWritable, Text>() { + rdd.mapToPair(new PairFunction, IntWritable, Text>() { @Override public Tuple2 call(Tuple2 pair) { return new Tuple2(new IntWritable(pair._1()), new Text(pair._2())); @@ -601,7 +601,7 @@ public Tuple2 call(Tuple2 pair) { // Try reading the output back as an object file JavaPairRDD readRDD = sc.sequenceFile(outputDir, IntWritable.class, - Text.class).map(new PairFunction, Integer, String>() { + Text.class).mapToPair(new PairFunction, Integer, String>() { @Override public Tuple2 call(Tuple2 pair) { return new Tuple2(pair._1().get(), pair._2().toString()); @@ -622,7 +622,7 @@ public void writeWithNewAPIHadoopFile() { ); JavaPairRDD rdd = sc.parallelizePairs(pairs); - rdd.map(new PairFunction, IntWritable, Text>() { + rdd.mapToPair(new PairFunction, IntWritable, Text>() { @Override public Tuple2 call(Tuple2 pair) { return new Tuple2(new IntWritable(pair._1()), new Text(pair._2())); @@ -653,7 +653,7 @@ public void readWithNewAPIHadoopFile() throws IOException { ); JavaPairRDD rdd = sc.parallelizePairs(pairs); - rdd.map(new PairFunction, IntWritable, Text>() { + rdd.mapToPair(new PairFunction, IntWritable, Text>() { @Override public Tuple2 call(Tuple2 pair) { return new Tuple2(new IntWritable(pair._1()), new Text(pair._2())); @@ -713,7 +713,7 @@ public void hadoopFile() { ); JavaPairRDD rdd = sc.parallelizePairs(pairs); - rdd.map(new PairFunction, IntWritable, Text>() { + rdd.mapToPair(new PairFunction, IntWritable, Text>() { @Override public Tuple2 call(Tuple2 pair) { return new Tuple2(new IntWritable(pair._1()), new Text(pair._2())); @@ -743,7 +743,7 @@ public void hadoopFileCompressed() { ); JavaPairRDD rdd = sc.parallelizePairs(pairs); - rdd.map(new PairFunction, IntWritable, Text>() { + rdd.mapToPair(new PairFunction, IntWritable, Text>() { @Override public Tuple2 call(Tuple2 pair) { return new Tuple2(new IntWritable(pair._1()), new Text(pair._2())); @@ -766,7 +766,7 @@ public String call(Tuple2 x) { @Test public void zip() { JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); - JavaDoubleRDD doubles = rdd.map(new DoubleFunction() { + JavaDoubleRDD doubles = rdd.mapToDouble(new DoubleFunction() { @Override public Double call(Integer x) { return 1.0 * x; @@ -893,13 +893,13 @@ public void checkpointAndRestore() { @Test public void mapOnPairRDD() { JavaRDD rdd1 = sc.parallelize(Arrays.asList(1,2,3,4)); - JavaPairRDD rdd2 = rdd1.map(new PairFunction() { + JavaPairRDD rdd2 = rdd1.mapToPair(new PairFunction() { @Override public Tuple2 call(Integer i) throws Exception { return new Tuple2(i, i % 2); } }); - JavaPairRDD rdd3 = rdd2.map( + JavaPairRDD rdd3 = rdd2.mapToPair( new PairFunction, Integer, Integer>() { @Override public Tuple2 call(Tuple2 in) throws Exception { @@ -919,7 +919,7 @@ public Tuple2 call(Tuple2 in) throws Excepti public void collectPartitions() { JavaRDD rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7), 3); - JavaPairRDD rdd2 = rdd1.map(new PairFunction() { + JavaPairRDD rdd2 = rdd1.mapToPair(new PairFunction() { @Override public Tuple2 call(Integer i) throws Exception { return new Tuple2(i, i % 2); @@ -984,7 +984,7 @@ public void countApproxDistinctByKey() { public void collectAsMapWithIntArrayValues() { // Regression test for SPARK-1040 JavaRDD rdd = sc.parallelize(Arrays.asList(1)); - JavaPairRDD pairRDD = rdd.map(new PairFunction() { + JavaPairRDD pairRDD = rdd.mapToPair(new PairFunction() { @Override public Tuple2 call(Integer x) throws Exception { return new Tuple2(x, new int[] { x }); diff --git a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java index d552c47b22231..6b49244ba459d 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java @@ -45,7 +45,7 @@ static class DataPoint implements Serializable { double y; } - static class ParsePoint extends Function { + static class ParsePoint implements Function { private static final Pattern SPACE = Pattern.compile(" "); @Override @@ -60,7 +60,7 @@ public DataPoint call(String line) { } } - static class VectorSum extends Function2 { + static class VectorSum implements Function2 { @Override public double[] call(double[] a, double[] b) { double[] result = new double[D]; @@ -71,7 +71,7 @@ public double[] call(double[] a, double[] b) { } } - static class ComputeGradient extends Function { + static class ComputeGradient implements Function { private final double[] weights; ComputeGradient(double[] weights) { diff --git a/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java index 0dc879275a22a..2d797279d5bcc 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java @@ -98,7 +98,7 @@ public Vector call(String line) { double tempDist; do { // allocate each vector to closest centroid - JavaPairRDD closest = data.map( + JavaPairRDD closest = data.mapToPair( new PairFunction() { @Override public Tuple2 call(Vector vector) { diff --git a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java index 9eb1cadd71d22..a518fe2f27eb0 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java @@ -108,7 +108,7 @@ public static void main(String[] args) { JavaRDD dataSet = (args.length == 2) ? jsc.textFile(args[1]) : jsc.parallelize(exampleApacheLogs); - JavaPairRDD, Stats> extracted = dataSet.map(new PairFunction, Stats>() { + JavaPairRDD, Stats> extracted = dataSet.mapToPair(new PairFunction, Stats>() { @Override public Tuple2, Stats> call(String s) { return new Tuple2, Stats>(extractKey(s), extractStats(s)); diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java index a84245b0c7449..e53925b50c2ce 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java @@ -42,7 +42,7 @@ public final class JavaPageRank { private static final Pattern SPACES = Pattern.compile("\\s+"); - private static class Sum extends Function2 { + private static class Sum implements Function2 { @Override public Double call(Double a, Double b) { return a + b; @@ -66,7 +66,7 @@ public static void main(String[] args) throws Exception { JavaRDD lines = ctx.textFile(args[1], 1); // Loads all URLs from input file and initialize their neighbors. - JavaPairRDD> links = lines.map(new PairFunction() { + JavaPairRDD> links = lines.mapToPair(new PairFunction() { @Override public Tuple2 call(String s) { String[] parts = SPACES.split(s); @@ -86,7 +86,7 @@ public Double call(List rs) { for (int current = 0; current < Integer.parseInt(args[2]); current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD contribs = links.join(ranks).values() - .flatMap(new PairFlatMapFunction, Double>, String, Double>() { + .flatMapToPair(new PairFlatMapFunction, Double>, String, Double>() { @Override public Iterable> call(Tuple2, Double> s) { List> results = new ArrayList>(); diff --git a/examples/src/main/java/org/apache/spark/examples/JavaTC.java b/examples/src/main/java/org/apache/spark/examples/JavaTC.java index 2ceb0fd94ba65..6cfe25c80ecc6 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaTC.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaTC.java @@ -50,7 +50,7 @@ static List> generateGraph() { return new ArrayList>(edges); } - static class ProjectFn extends PairFunction>, + static class ProjectFn implements PairFunction>, Integer, Integer> { static final ProjectFn INSTANCE = new ProjectFn(); @@ -77,7 +77,7 @@ public static void main(String[] args) { // the graph to obtain the path (x, z). // Because join() joins on keys, the edges are stored in reversed order. - JavaPairRDD edges = tc.map( + JavaPairRDD edges = tc.mapToPair( new PairFunction, Integer, Integer>() { @Override public Tuple2 call(Tuple2 e) { @@ -91,7 +91,7 @@ public Tuple2 call(Tuple2 e) { oldCount = nextCount; // Perform the join, obtaining an RDD of (y, (z, x)) pairs, // then project the result to obtain the new (x, z) paths. - tc = tc.union(tc.join(edges).map(ProjectFn.INSTANCE)).distinct().cache(); + tc = tc.union(tc.join(edges).mapToPair(ProjectFn.INSTANCE)).distinct().cache(); nextCount = tc.count(); } while (nextCount != oldCount); diff --git a/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java index 6651f98d56711..fa1b977ab19f1 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java @@ -49,7 +49,7 @@ public Iterable call(String s) { } }); - JavaPairRDD ones = words.map(new PairFunction() { + JavaPairRDD ones = words.mapToPair(new PairFunction() { @Override public Tuple2 call(String s) { return new Tuple2(s, 1); diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java index 435a86e62abc5..64a3a04fb7296 100644 --- a/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java +++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java @@ -35,7 +35,7 @@ */ public final class JavaALS { - static class ParseRating extends Function { + static class ParseRating implements Function { private static final Pattern COMMA = Pattern.compile(","); @Override @@ -48,7 +48,7 @@ public Rating call(String line) { } } - static class FeaturesToString extends Function, String> { + static class FeaturesToString implements Function, String> { @Override public String call(Tuple2 element) { return element._1() + "," + Arrays.toString(element._2()); diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java index 4b2658f257b3c..76ebdccfd6b67 100644 --- a/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java +++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java @@ -32,7 +32,7 @@ */ public final class JavaKMeans { - static class ParsePoint extends Function { + static class ParsePoint implements Function { private static final Pattern SPACE = Pattern.compile(" "); @Override diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java index 21586ce817d09..667c72f379e71 100644 --- a/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java +++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java @@ -34,7 +34,7 @@ */ public final class JavaLR { - static class ParsePoint extends Function { + static class ParsePoint implements Function { private static final Pattern COMMA = Pattern.compile(","); private static final Pattern SPACE = Pattern.compile(" "); diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java index 2ffd351b4e498..d704be08d6945 100644 --- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java +++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java @@ -89,7 +89,7 @@ public Iterable call(String x) { } }); - JavaPairDStream wordCounts = words.map( + JavaPairDStream wordCounts = words.mapToPair( new PairFunction() { @Override public Tuple2 call(String s) { diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java index 7777c9832abd3..7f68d451e9b31 100644 --- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java +++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java @@ -69,7 +69,7 @@ public Iterable call(String x) { return Lists.newArrayList(SPACE.split(x)); } }); - JavaPairDStream wordCounts = words.map( + JavaPairDStream wordCounts = words.mapToPair( new PairFunction() { @Override public Tuple2 call(String s) { diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java index 26c44620abec1..88ad341641e0a 100644 --- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java +++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java @@ -63,7 +63,7 @@ public static void main(String[] args) throws Exception { // Create the QueueInputDStream and use it do some processing JavaDStream inputStream = ssc.queueStream(rddQueue); - JavaPairDStream mappedStream = inputStream.map( + JavaPairDStream mappedStream = inputStream.mapToPair( new PairFunction() { @Override public Tuple2 call(Integer i) { diff --git a/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala b/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala index c989ec0f27465..b254e00714621 100644 --- a/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala +++ b/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala @@ -75,7 +75,7 @@ object ZeroMQUtils { ): JavaDStream[T] = { implicit val cm: ClassTag[T] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]] - val fn = (x: Seq[ByteString]) => bytesToObjects.apply(x.map(_.toArray).toArray).toIterator + val fn = (x: Seq[ByteString]) => bytesToObjects.call(x.map(_.toArray).toArray).toIterator createStream[T](jssc.ssc, publisherUrl, subscribe, fn, storageLevel, supervisorStrategy) } @@ -99,7 +99,7 @@ object ZeroMQUtils { ): JavaDStream[T] = { implicit val cm: ClassTag[T] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]] - val fn = (x: Seq[ByteString]) => bytesToObjects.apply(x.map(_.toArray).toArray).toIterator + val fn = (x: Seq[ByteString]) => bytesToObjects.call(x.map(_.toArray).toArray).toIterator createStream[T](jssc.ssc, publisherUrl, subscribe, fn, storageLevel) } @@ -122,7 +122,7 @@ object ZeroMQUtils { ): JavaDStream[T] = { implicit val cm: ClassTag[T] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]] - val fn = (x: Seq[ByteString]) => bytesToObjects.apply(x.map(_.toArray).toArray).toIterator + val fn = (x: Seq[ByteString]) => bytesToObjects.call(x.map(_.toArray).toArray).toIterator createStream[T](jssc.ssc, publisherUrl, subscribe, fn) } } diff --git a/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java b/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java new file mode 100644 index 0000000000000..fc47b69dd10fb --- /dev/null +++ b/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java @@ -0,0 +1,377 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark; + +import com.google.common.base.Optional; +import com.google.common.io.Files; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.api.java.JavaDoubleRDD; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.*; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import scala.Tuple2; + +import java.io.File; +import java.io.Serializable; +import java.util.*; + + +/** + * Most of these tests replicate org.apache.spark.JavaAPISuite using java 8 + * lambda syntax. + */ +public class Java8APISuite implements Serializable { + static int foreachCalls = 0; + private transient JavaSparkContext sc; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaAPISuite"); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown + System.clearProperty("spark.driver.port"); + } + + @Test + public void foreach() { + foreachCalls = 0; + JavaRDD rdd = sc.parallelize(Arrays.asList("Hello", "World")); + rdd.foreach((x) -> foreachCalls++); + Assert.assertEquals(2, foreachCalls); + } + + @Test + public void groupBy() { + JavaRDD rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); + Function isOdd = x -> x % 2 == 0; + JavaPairRDD> oddsAndEvens = rdd.groupBy(isOdd); + Assert.assertEquals(2, oddsAndEvens.count()); + Assert.assertEquals(2, oddsAndEvens.lookup(true).get(0).size()); // Evens + Assert.assertEquals(5, oddsAndEvens.lookup(false).get(0).size()); // Odds + + oddsAndEvens = rdd.groupBy(isOdd, 1); + Assert.assertEquals(2, oddsAndEvens.count()); + Assert.assertEquals(2, oddsAndEvens.lookup(true).get(0).size()); // Evens + Assert.assertEquals(5, oddsAndEvens.lookup(false).get(0).size()); // Odds + } + + @Test + public void leftOuterJoin() { + JavaPairRDD rdd1 = sc.parallelizePairs(Arrays.asList( + new Tuple2(1, 1), + new Tuple2(1, 2), + new Tuple2(2, 1), + new Tuple2(3, 1) + )); + JavaPairRDD rdd2 = sc.parallelizePairs(Arrays.asList( + new Tuple2(1, 'x'), + new Tuple2(2, 'y'), + new Tuple2(2, 'z'), + new Tuple2(4, 'w') + )); + List>>> joined = + rdd1.leftOuterJoin(rdd2).collect(); + Assert.assertEquals(5, joined.size()); + Tuple2>> firstUnmatched = + rdd1.leftOuterJoin(rdd2).filter(tup -> !tup._2()._2().isPresent()).first(); + Assert.assertEquals(3, firstUnmatched._1().intValue()); + } + + @Test + public void foldReduce() { + JavaRDD rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13)); + Function2 add = (a, b) -> a + b; + + int sum = rdd.fold(0, add); + Assert.assertEquals(33, sum); + + sum = rdd.reduce(add); + Assert.assertEquals(33, sum); + } + + @Test + public void foldByKey() { + List> pairs = Arrays.asList( + new Tuple2(2, 1), + new Tuple2(2, 1), + new Tuple2(1, 1), + new Tuple2(3, 2), + new Tuple2(3, 1) + ); + JavaPairRDD rdd = sc.parallelizePairs(pairs); + JavaPairRDD sums = rdd.foldByKey(0, (a, b) -> a + b); + Assert.assertEquals(1, sums.lookup(1).get(0).intValue()); + Assert.assertEquals(2, sums.lookup(2).get(0).intValue()); + Assert.assertEquals(3, sums.lookup(3).get(0).intValue()); + } + + @Test + public void reduceByKey() { + List> pairs = Arrays.asList( + new Tuple2(2, 1), + new Tuple2(2, 1), + new Tuple2(1, 1), + new Tuple2(3, 2), + new Tuple2(3, 1) + ); + JavaPairRDD rdd = sc.parallelizePairs(pairs); + JavaPairRDD counts = rdd.reduceByKey((a, b) -> a + b); + Assert.assertEquals(1, counts.lookup(1).get(0).intValue()); + Assert.assertEquals(2, counts.lookup(2).get(0).intValue()); + Assert.assertEquals(3, counts.lookup(3).get(0).intValue()); + + Map localCounts = counts.collectAsMap(); + Assert.assertEquals(1, localCounts.get(1).intValue()); + Assert.assertEquals(2, localCounts.get(2).intValue()); + Assert.assertEquals(3, localCounts.get(3).intValue()); + + localCounts = rdd.reduceByKeyLocally((a, b) -> a + b); + Assert.assertEquals(1, localCounts.get(1).intValue()); + Assert.assertEquals(2, localCounts.get(2).intValue()); + Assert.assertEquals(3, localCounts.get(3).intValue()); + } + + @Test + public void map() { + JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); + JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x).cache(); + doubles.collect(); + JavaPairRDD pairs = rdd.mapToPair(x -> new Tuple2(x, x)) + .cache(); + pairs.collect(); + JavaRDD strings = rdd.map(x -> x.toString()).cache(); + strings.collect(); + } + + @Test + public void flatMap() { + JavaRDD rdd = sc.parallelize(Arrays.asList("Hello World!", + "The quick brown fox jumps over the lazy dog.")); + JavaRDD words = rdd.flatMap(x -> Arrays.asList(x.split(" "))); + + Assert.assertEquals("Hello", words.first()); + Assert.assertEquals(11, words.count()); + + JavaPairRDD pairs = rdd.flatMapToPair(s -> { + List> pairs2 = new LinkedList>(); + for (String word : s.split(" ")) pairs2.add(new Tuple2(word, word)); + return pairs2; + }); + + Assert.assertEquals(new Tuple2("Hello", "Hello"), pairs.first()); + Assert.assertEquals(11, pairs.count()); + + JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { + List lengths = new LinkedList(); + for (String word : s.split(" ")) lengths.add(word.length() * 1.0); + return lengths; + }); + + Double x = doubles.first(); + Assert.assertEquals(5.0, doubles.first().doubleValue(), 0.01); + Assert.assertEquals(11, pairs.count()); + } + + @Test + public void mapsFromPairsToPairs() { + List> pairs = Arrays.asList( + new Tuple2(1, "a"), + new Tuple2(2, "aa"), + new Tuple2(3, "aaa") + ); + JavaPairRDD pairRDD = sc.parallelizePairs(pairs); + + // Regression test for SPARK-668: + JavaPairRDD swapped = + pairRDD.flatMapToPair(x -> Collections.singletonList(x.swap())); + swapped.collect(); + + // There was never a bug here, but it's worth testing: + pairRDD.map(item -> item.swap()).collect(); + } + + @Test + public void mapPartitions() { + JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2); + JavaRDD partitionSums = rdd.mapPartitions(iter -> { + int sum = 0; + while (iter.hasNext()) { + sum += iter.next(); + } + return Collections.singletonList(sum); + }); + + Assert.assertEquals("[3, 7]", partitionSums.collect().toString()); + } + + @Test + public void sequenceFile() { + File tempDir = Files.createTempDir(); + String outputDir = new File(tempDir, "output").getAbsolutePath(); + List> pairs = Arrays.asList( + new Tuple2(1, "a"), + new Tuple2(2, "aa"), + new Tuple2(3, "aaa") + ); + JavaPairRDD rdd = sc.parallelizePairs(pairs); + + rdd.mapToPair( + pair -> new Tuple2(new IntWritable(pair._1()), new Text(pair._2()))) + .saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class); + + // Try reading the output back as an object file + JavaPairRDD readRDD = sc.sequenceFile(outputDir, IntWritable.class, Text.class) + .mapToPair(pair -> new Tuple2(pair._1().get(), pair._2().toString())); + Assert.assertEquals(pairs, readRDD.collect()); + } + + @Test + public void zip() { + JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); + JavaDoubleRDD doubles = rdd.mapToDouble(x -> 1.0 * x); + JavaPairRDD zipped = rdd.zip(doubles); + zipped.count(); + } + + @Test + public void zipPartitions() { + JavaRDD rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6), 2); + JavaRDD rdd2 = sc.parallelize(Arrays.asList("1", "2", "3", "4"), 2); + FlatMapFunction2, Iterator, Integer> sizesFn = + (Iterator i, Iterator s) -> { + int sizeI = 0; + int sizeS = 0; + while (i.hasNext()) { + sizeI += 1; + i.next(); + } + while (s.hasNext()) { + sizeS += 1; + s.next(); + } + return Arrays.asList(sizeI, sizeS); + }; + JavaRDD sizes = rdd1.zipPartitions(rdd2, sizesFn); + Assert.assertEquals("[3, 2, 3, 2]", sizes.collect().toString()); + } + + @Test + public void accumulators() { + JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); + + final Accumulator intAccum = sc.intAccumulator(10); + rdd.foreach(x -> intAccum.add(x)); + Assert.assertEquals((Integer) 25, intAccum.value()); + + final Accumulator doubleAccum = sc.doubleAccumulator(10.0); + rdd.foreach(x -> doubleAccum.add((double) x)); + Assert.assertEquals((Double) 25.0, doubleAccum.value()); + + // Try a custom accumulator type + AccumulatorParam floatAccumulatorParam = new AccumulatorParam() { + public Float addInPlace(Float r, Float t) { + return r + t; + } + + public Float addAccumulator(Float r, Float t) { + return r + t; + } + + public Float zero(Float initialValue) { + return 0.0f; + } + }; + + final Accumulator floatAccum = sc.accumulator((Float) 10.0f, floatAccumulatorParam); + rdd.foreach(x -> floatAccum.add((float) x)); + Assert.assertEquals((Float) 25.0f, floatAccum.value()); + + // Test the setValue method + floatAccum.setValue(5.0f); + Assert.assertEquals((Float) 5.0f, floatAccum.value()); + } + + @Test + public void keyBy() { + JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2)); + List> s = rdd.keyBy(x -> x.toString()).collect(); + Assert.assertEquals(new Tuple2("1", 1), s.get(0)); + Assert.assertEquals(new Tuple2("2", 2), s.get(1)); + } + + @Test + public void mapOnPairRDD() { + JavaRDD rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4)); + JavaPairRDD rdd2 = + rdd1.mapToPair(i -> new Tuple2(i, i % 2)); + JavaPairRDD rdd3 = + rdd2.mapToPair(in -> new Tuple2(in._2(), in._1())); + Assert.assertEquals(Arrays.asList( + new Tuple2(1, 1), + new Tuple2(0, 2), + new Tuple2(1, 3), + new Tuple2(0, 4)), rdd3.collect()); + } + + @Test + public void collectPartitions() { + JavaRDD rdd1 = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7), 3); + + JavaPairRDD rdd2 = + rdd1.mapToPair(i -> new Tuple2(i, i % 2)); + List[] parts = rdd1.collectPartitions(new int[]{0}); + Assert.assertEquals(Arrays.asList(1, 2), parts[0]); + + parts = rdd1.collectPartitions(new int[]{1, 2}); + Assert.assertEquals(Arrays.asList(3, 4), parts[0]); + Assert.assertEquals(Arrays.asList(5, 6, 7), parts[1]); + + Assert.assertEquals(Arrays.asList(new Tuple2(1, 1), + new Tuple2(2, 0)), + rdd2.collectPartitions(new int[]{0})[0]); + + parts = rdd2.collectPartitions(new int[]{1, 2}); + Assert.assertEquals(Arrays.asList(new Tuple2(3, 1), + new Tuple2(4, 0)), parts[0]); + Assert.assertEquals(Arrays.asList(new Tuple2(5, 1), + new Tuple2(6, 0), + new Tuple2(7, 1)), parts[1]); + } + + @Test + public void collectAsMapWithIntArrayValues() { + // Regression test for SPARK-1040 + JavaRDD rdd = sc.parallelize(Arrays.asList(new Integer[]{1})); + JavaPairRDD pairRDD = + rdd.mapToPair(x -> new Tuple2(x, new int[]{x})); + pairRDD.collect(); // Works fine + Map map = pairRDD.collectAsMap(); // Used to crash with ClassCastException + } +} diff --git a/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java b/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java new file mode 100644 index 0000000000000..cd3c925e50fbe --- /dev/null +++ b/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java @@ -0,0 +1,832 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.streaming; + +import com.google.common.base.Optional; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import org.apache.spark.HashPartitioner; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.streaming.api.java.JavaDStream; +import org.apache.spark.streaming.api.java.JavaPairDStream; +import org.junit.Assert; +import org.junit.Test; +import scala.Tuple2; + +import java.io.Serializable; +import java.util.*; + +/** + * Most of these tests replicate org.apache.spark.streaming.JavaAPISuite using java 8 + * lambda syntax. + */ +public class Java8APISuite extends LocalJavaStreamingContext implements Serializable { + + @Test + public void testMap() { + List> inputData = Arrays.asList( + Arrays.asList("hello", "world"), + Arrays.asList("goodnight", "moon")); + + List> expected = Arrays.asList( + Arrays.asList(5, 5), + Arrays.asList(9, 4)); + + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream letterCount = stream.map(s -> s.length()); + JavaTestUtils.attachTestOutputStream(letterCount); + List> result = JavaTestUtils.runStreams(ssc, 2, 2); + + assertOrderInvariantEquals(expected, result); + } + + @Test + public void testFilter() { + List> inputData = Arrays.asList( + Arrays.asList("giants", "dodgers"), + Arrays.asList("yankees", "red socks")); + + List> expected = Arrays.asList( + Arrays.asList("giants"), + Arrays.asList("yankees")); + + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream filtered = stream.filter(s -> s.contains("a")); + JavaTestUtils.attachTestOutputStream(filtered); + List> result = JavaTestUtils.runStreams(ssc, 2, 2); + + assertOrderInvariantEquals(expected, result); + } + + @Test + public void testMapPartitions() { + List> inputData = Arrays.asList( + Arrays.asList("giants", "dodgers"), + Arrays.asList("yankees", "red socks")); + + List> expected = Arrays.asList( + Arrays.asList("GIANTSDODGERS"), + Arrays.asList("YANKEESRED SOCKS")); + + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream mapped = stream.mapPartitions(in -> { + String out = ""; + while (in.hasNext()) { + out = out + in.next().toUpperCase(); + } + return Lists.newArrayList(out); + }); + JavaTestUtils.attachTestOutputStream(mapped); + List> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testReduce() { + List> inputData = Arrays.asList( + Arrays.asList(1, 2, 3), + Arrays.asList(4, 5, 6), + Arrays.asList(7, 8, 9)); + + List> expected = Arrays.asList( + Arrays.asList(6), + Arrays.asList(15), + Arrays.asList(24)); + + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream reduced = stream.reduce((x, y) -> x + y); + JavaTestUtils.attachTestOutputStream(reduced); + List> result = JavaTestUtils.runStreams(ssc, 3, 3); + + Assert.assertEquals(expected, result); + } + + @Test + public void testReduceByWindow() { + List> inputData = Arrays.asList( + Arrays.asList(1, 2, 3), + Arrays.asList(4, 5, 6), + Arrays.asList(7, 8, 9)); + + List> expected = Arrays.asList( + Arrays.asList(6), + Arrays.asList(21), + Arrays.asList(39), + Arrays.asList(24)); + + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream reducedWindowed = stream.reduceByWindow((x, y) -> x + y, + (x, y) -> x - y, new Duration(2000), new Duration(1000)); + JavaTestUtils.attachTestOutputStream(reducedWindowed); + List> result = JavaTestUtils.runStreams(ssc, 4, 4); + + Assert.assertEquals(expected, result); + } + + @Test + public void testTransform() { + List> inputData = Arrays.asList( + Arrays.asList(1, 2, 3), + Arrays.asList(4, 5, 6), + Arrays.asList(7, 8, 9)); + + List> expected = Arrays.asList( + Arrays.asList(3, 4, 5), + Arrays.asList(6, 7, 8), + Arrays.asList(9, 10, 11)); + + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream transformed = stream.transform(in -> in.map(i -> i + 2)); + + JavaTestUtils.attachTestOutputStream(transformed); + List> result = JavaTestUtils.runStreams(ssc, 3, 3); + + assertOrderInvariantEquals(expected, result); + } + + @Test + public void testVariousTransform() { + // tests whether all variations of transform can be called from Java + + List> inputData = Arrays.asList(Arrays.asList(1)); + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + + List>> pairInputData = + Arrays.asList(Arrays.asList(new Tuple2("x", 1))); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream( + JavaTestUtils.attachTestInputStream(ssc, pairInputData, 1)); + + JavaDStream transformed1 = stream.transform(in -> null); + JavaDStream transformed2 = stream.transform((x, time) -> null); + JavaPairDStream transformed3 = stream.transformToPair(x -> null); + JavaPairDStream transformed4 = stream.transformToPair((x, time) -> null); + JavaDStream pairTransformed1 = pairStream.transform(x -> null); + JavaDStream pairTransformed2 = pairStream.transform((x, time) -> null); + JavaPairDStream pairTransformed3 = pairStream.transformToPair(x -> null); + JavaPairDStream pairTransformed4 = + pairStream.transformToPair((x, time) -> null); + + } + + @Test + public void testTransformWith() { + List>> stringStringKVStream1 = Arrays.asList( + Arrays.asList( + new Tuple2("california", "dodgers"), + new Tuple2("new york", "yankees")), + Arrays.asList( + new Tuple2("california", "sharks"), + new Tuple2("new york", "rangers"))); + + List>> stringStringKVStream2 = Arrays.asList( + Arrays.asList( + new Tuple2("california", "giants"), + new Tuple2("new york", "mets")), + Arrays.asList( + new Tuple2("california", "ducks"), + new Tuple2("new york", "islanders"))); + + + List>>> expected = Arrays.asList( + Sets.newHashSet( + new Tuple2>("california", + new Tuple2("dodgers", "giants")), + new Tuple2>("new york", + new Tuple2("yankees", "mets"))), + Sets.newHashSet( + new Tuple2>("california", + new Tuple2("sharks", "ducks")), + new Tuple2>("new york", + new Tuple2("rangers", "islanders")))); + + JavaDStream> stream1 = JavaTestUtils.attachTestInputStream( + ssc, stringStringKVStream1, 1); + JavaPairDStream pairStream1 = JavaPairDStream.fromJavaDStream(stream1); + + JavaDStream> stream2 = JavaTestUtils.attachTestInputStream( + ssc, stringStringKVStream2, 1); + JavaPairDStream pairStream2 = JavaPairDStream.fromJavaDStream(stream2); + + JavaPairDStream> joined = + pairStream1.transformWithToPair(pairStream2,(x, y, z) -> x.join(y)); + + JavaTestUtils.attachTestOutputStream(joined); + List>>> result = JavaTestUtils.runStreams(ssc, 2, 2); + List>>> unorderedResult = Lists.newArrayList(); + for (List>> res : result) { + unorderedResult.add(Sets.newHashSet(res)); + } + + Assert.assertEquals(expected, unorderedResult); + } + + + @Test + public void testVariousTransformWith() { + // tests whether all variations of transformWith can be called from Java + + List> inputData1 = Arrays.asList(Arrays.asList(1)); + List> inputData2 = Arrays.asList(Arrays.asList("x")); + JavaDStream stream1 = JavaTestUtils.attachTestInputStream(ssc, inputData1, 1); + JavaDStream stream2 = JavaTestUtils.attachTestInputStream(ssc, inputData2, 1); + + List>> pairInputData1 = + Arrays.asList(Arrays.asList(new Tuple2("x", 1))); + List>> pairInputData2 = + Arrays.asList(Arrays.asList(new Tuple2(1.0, 'x'))); + JavaPairDStream pairStream1 = JavaPairDStream.fromJavaDStream( + JavaTestUtils.attachTestInputStream(ssc, pairInputData1, 1)); + JavaPairDStream pairStream2 = JavaPairDStream.fromJavaDStream( + JavaTestUtils.attachTestInputStream(ssc, pairInputData2, 1)); + + JavaDStream transformed1 = stream1.transformWith(stream2, (x, y, z) -> null); + JavaDStream transformed2 = stream1.transformWith(pairStream1,(x, y, z) -> null); + + JavaPairDStream transformed3 = + stream1.transformWithToPair(stream2,(x, y, z) -> null); + + JavaPairDStream transformed4 = + stream1.transformWithToPair(pairStream1,(x, y, z) -> null); + + JavaDStream pairTransformed1 = pairStream1.transformWith(stream2,(x, y, z) -> null); + + JavaDStream pairTransformed2_ = + pairStream1.transformWith(pairStream1,(x, y, z) -> null); + + JavaPairDStream pairTransformed3 = + pairStream1.transformWithToPair(stream2,(x, y, z) -> null); + + JavaPairDStream pairTransformed4 = + pairStream1.transformWithToPair(pairStream2,(x, y, z) -> null); + } + + @Test + public void testStreamingContextTransform() { + List> stream1input = Arrays.asList( + Arrays.asList(1), + Arrays.asList(2) + ); + + List> stream2input = Arrays.asList( + Arrays.asList(3), + Arrays.asList(4) + ); + + List>> pairStream1input = Arrays.asList( + Arrays.asList(new Tuple2(1, "x")), + Arrays.asList(new Tuple2(2, "y")) + ); + + List>>> expected = Arrays.asList( + Arrays.asList(new Tuple2>(1, new Tuple2(1, "x"))), + Arrays.asList(new Tuple2>(2, new Tuple2(2, "y"))) + ); + + JavaDStream stream1 = JavaTestUtils.attachTestInputStream(ssc, stream1input, 1); + JavaDStream stream2 = JavaTestUtils.attachTestInputStream(ssc, stream2input, 1); + JavaPairDStream pairStream1 = JavaPairDStream.fromJavaDStream( + JavaTestUtils.attachTestInputStream(ssc, pairStream1input, 1)); + + List> listOfDStreams1 = Arrays.>asList(stream1, stream2); + + // This is just to test whether this transform to JavaStream compiles + JavaDStream transformed1 = ssc.transform( + listOfDStreams1, (List> listOfRDDs, Time time) -> { + assert (listOfRDDs.size() == 2); + return null; + }); + + List> listOfDStreams2 = + Arrays.>asList(stream1, stream2, pairStream1.toJavaDStream()); + + JavaPairDStream> transformed2 = ssc.transformToPair( + listOfDStreams2, (List> listOfRDDs, Time time) -> { + assert (listOfRDDs.size() == 3); + JavaRDD rdd1 = (JavaRDD) listOfRDDs.get(0); + JavaRDD rdd2 = (JavaRDD) listOfRDDs.get(1); + JavaRDD> rdd3 = (JavaRDD>) listOfRDDs.get(2); + JavaPairRDD prdd3 = JavaPairRDD.fromJavaRDD(rdd3); + PairFunction mapToTuple = + (Integer i) -> new Tuple2(i, i); + return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3); + }); + JavaTestUtils.attachTestOutputStream(transformed2); + List>>> result = JavaTestUtils.runStreams(ssc, 2, 2); + Assert.assertEquals(expected, result); + } + + @Test + public void testFlatMap() { + List> inputData = Arrays.asList( + Arrays.asList("go", "giants"), + Arrays.asList("boo", "dodgers"), + Arrays.asList("athletics")); + + List> expected = Arrays.asList( + Arrays.asList("g", "o", "g", "i", "a", "n", "t", "s"), + Arrays.asList("b", "o", "o", "d", "o", "d", "g", "e", "r", "s"), + Arrays.asList("a", "t", "h", "l", "e", "t", "i", "c", "s")); + + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream flatMapped = stream.flatMap(s -> Lists.newArrayList(s.split("(?!^)"))); + JavaTestUtils.attachTestOutputStream(flatMapped); + List> result = JavaTestUtils.runStreams(ssc, 3, 3); + + assertOrderInvariantEquals(expected, result); + } + + @Test + public void testPairFlatMap() { + List> inputData = Arrays.asList( + Arrays.asList("giants"), + Arrays.asList("dodgers"), + Arrays.asList("athletics")); + + List>> expected = Arrays.asList( + Arrays.asList( + new Tuple2(6, "g"), + new Tuple2(6, "i"), + new Tuple2(6, "a"), + new Tuple2(6, "n"), + new Tuple2(6, "t"), + new Tuple2(6, "s")), + Arrays.asList( + new Tuple2(7, "d"), + new Tuple2(7, "o"), + new Tuple2(7, "d"), + new Tuple2(7, "g"), + new Tuple2(7, "e"), + new Tuple2(7, "r"), + new Tuple2(7, "s")), + Arrays.asList( + new Tuple2(9, "a"), + new Tuple2(9, "t"), + new Tuple2(9, "h"), + new Tuple2(9, "l"), + new Tuple2(9, "e"), + new Tuple2(9, "t"), + new Tuple2(9, "i"), + new Tuple2(9, "c"), + new Tuple2(9, "s"))); + + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaPairDStream flatMapped = stream.flatMapToPair(s -> { + List> out = Lists.newArrayList(); + for (String letter : s.split("(?!^)")) { + out.add(new Tuple2(s.length(), letter)); + } + return out; + }); + + JavaTestUtils.attachTestOutputStream(flatMapped); + List>> result = JavaTestUtils.runStreams(ssc, 3, 3); + + Assert.assertEquals(expected, result); + } + + /* + * Performs an order-invariant comparison of lists representing two RDD streams. This allows + * us to account for ordering variation within individual RDD's which occurs during windowing. + */ + public static > void assertOrderInvariantEquals( + List> expected, List> actual) { + for (List list : expected) { + Collections.sort(list); + } + for (List list : actual) { + Collections.sort(list); + } + Assert.assertEquals(expected, actual); + } + + @Test + public void testPairFilter() { + List> inputData = Arrays.asList( + Arrays.asList("giants", "dodgers"), + Arrays.asList("yankees", "red socks")); + + List>> expected = Arrays.asList( + Arrays.asList(new Tuple2("giants", 6)), + Arrays.asList(new Tuple2("yankees", 7))); + + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaPairDStream pairStream = + stream.mapToPair(x -> new Tuple2<>(x, x.length())); + JavaPairDStream filtered = pairStream.filter(x -> x._1().contains("a")); + JavaTestUtils.attachTestOutputStream(filtered); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + List>> stringStringKVStream = Arrays.asList( + Arrays.asList(new Tuple2("california", "dodgers"), + new Tuple2("california", "giants"), + new Tuple2("new york", "yankees"), + new Tuple2("new york", "mets")), + Arrays.asList(new Tuple2("california", "sharks"), + new Tuple2("california", "ducks"), + new Tuple2("new york", "rangers"), + new Tuple2("new york", "islanders"))); + + List>> stringIntKVStream = Arrays.asList( + Arrays.asList( + new Tuple2("california", 1), + new Tuple2("california", 3), + new Tuple2("new york", 4), + new Tuple2("new york", 1)), + Arrays.asList( + new Tuple2("california", 5), + new Tuple2("california", 5), + new Tuple2("new york", 3), + new Tuple2("new york", 1))); + + @Test + public void testPairMap() { // Maps pair -> pair of different type + List>> inputData = stringIntKVStream; + + List>> expected = Arrays.asList( + Arrays.asList( + new Tuple2(1, "california"), + new Tuple2(3, "california"), + new Tuple2(4, "new york"), + new Tuple2(1, "new york")), + Arrays.asList( + new Tuple2(5, "california"), + new Tuple2(5, "california"), + new Tuple2(3, "new york"), + new Tuple2(1, "new york"))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + JavaPairDStream reversed = pairStream.mapToPair(x -> x.swap()); + JavaTestUtils.attachTestOutputStream(reversed); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testPairMapPartitions() { // Maps pair -> pair of different type + List>> inputData = stringIntKVStream; + + List>> expected = Arrays.asList( + Arrays.asList( + new Tuple2(1, "california"), + new Tuple2(3, "california"), + new Tuple2(4, "new york"), + new Tuple2(1, "new york")), + Arrays.asList( + new Tuple2(5, "california"), + new Tuple2(5, "california"), + new Tuple2(3, "new york"), + new Tuple2(1, "new york"))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + JavaPairDStream reversed = pairStream.mapPartitionsToPair(in -> { + LinkedList> out = new LinkedList>(); + while (in.hasNext()) { + Tuple2 next = in.next(); + out.add(next.swap()); + } + return out; + }); + + JavaTestUtils.attachTestOutputStream(reversed); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testPairMap2() { // Maps pair -> single + List>> inputData = stringIntKVStream; + + List> expected = Arrays.asList( + Arrays.asList(1, 3, 4, 1), + Arrays.asList(5, 5, 3, 1)); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + JavaDStream reversed = pairStream.map(in -> in._2()); + JavaTestUtils.attachTestOutputStream(reversed); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testPairToPairFlatMapWithChangingTypes() { // Maps pair -> pair + List>> inputData = Arrays.asList( + Arrays.asList( + new Tuple2("hi", 1), + new Tuple2("ho", 2)), + Arrays.asList( + new Tuple2("hi", 1), + new Tuple2("ho", 2))); + + List>> expected = Arrays.asList( + Arrays.asList( + new Tuple2(1, "h"), + new Tuple2(1, "i"), + new Tuple2(2, "h"), + new Tuple2(2, "o")), + Arrays.asList( + new Tuple2(1, "h"), + new Tuple2(1, "i"), + new Tuple2(2, "h"), + new Tuple2(2, "o"))); + + JavaDStream> stream = + JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + JavaPairDStream flatMapped = pairStream.flatMapToPair(in -> { + List> out = new LinkedList>(); + for (Character s : in._1().toCharArray()) { + out.add(new Tuple2(in._2(), s.toString())); + } + return out; + }); + + JavaTestUtils.attachTestOutputStream(flatMapped); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testPairReduceByKey() { + List>> inputData = stringIntKVStream; + + List>> expected = Arrays.asList( + Arrays.asList( + new Tuple2("california", 4), + new Tuple2("new york", 5)), + Arrays.asList( + new Tuple2("california", 10), + new Tuple2("new york", 4))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream( + ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + + JavaPairDStream reduced = pairStream.reduceByKey((x, y) -> x + y); + + JavaTestUtils.attachTestOutputStream(reduced); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testCombineByKey() { + List>> inputData = stringIntKVStream; + + List>> expected = Arrays.asList( + Arrays.asList( + new Tuple2("california", 4), + new Tuple2("new york", 5)), + Arrays.asList( + new Tuple2("california", 10), + new Tuple2("new york", 4))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream( + ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + + JavaPairDStream combined = pairStream.combineByKey(i -> i, + (x, y) -> x + y, (x, y) -> x + y, new HashPartitioner(2)); + + JavaTestUtils.attachTestOutputStream(combined); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testReduceByKeyAndWindow() { + List>> inputData = stringIntKVStream; + + List>> expected = Arrays.asList( + Arrays.asList(new Tuple2("california", 4), + new Tuple2("new york", 5)), + Arrays.asList(new Tuple2("california", 14), + new Tuple2("new york", 9)), + Arrays.asList(new Tuple2("california", 10), + new Tuple2("new york", 4))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + + JavaPairDStream reduceWindowed = + pairStream.reduceByKeyAndWindow((x, y) -> x + y, new Duration(2000), new Duration(1000)); + JavaTestUtils.attachTestOutputStream(reduceWindowed); + List>> result = JavaTestUtils.runStreams(ssc, 3, 3); + + Assert.assertEquals(expected, result); + } + + @Test + public void testUpdateStateByKey() { + List>> inputData = stringIntKVStream; + + List>> expected = Arrays.asList( + Arrays.asList(new Tuple2("california", 4), + new Tuple2("new york", 5)), + Arrays.asList(new Tuple2("california", 14), + new Tuple2("new york", 9)), + Arrays.asList(new Tuple2("california", 14), + new Tuple2("new york", 9))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + + JavaPairDStream updated = pairStream.updateStateByKey((values, state) -> { + int out = 0; + if (state.isPresent()) { + out = out + state.get(); + } + for (Integer v : values) { + out = out + v; + } + return Optional.of(out); + }); + + JavaTestUtils.attachTestOutputStream(updated); + List>> result = JavaTestUtils.runStreams(ssc, 3, 3); + + Assert.assertEquals(expected, result); + } + + @Test + public void testReduceByKeyAndWindowWithInverse() { + List>> inputData = stringIntKVStream; + + List>> expected = Arrays.asList( + Arrays.asList(new Tuple2("california", 4), + new Tuple2("new york", 5)), + Arrays.asList(new Tuple2("california", 14), + new Tuple2("new york", 9)), + Arrays.asList(new Tuple2("california", 10), + new Tuple2("new york", 4))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + + JavaPairDStream reduceWindowed = + pairStream.reduceByKeyAndWindow((x, y) -> x + y, (x, y) -> x - y, new Duration(2000), new Duration(1000)); + JavaTestUtils.attachTestOutputStream(reduceWindowed); + List>> result = JavaTestUtils.runStreams(ssc, 3, 3); + + Assert.assertEquals(expected, result); + } + + @Test + public void testPairTransform() { + List>> inputData = Arrays.asList( + Arrays.asList( + new Tuple2(3, 5), + new Tuple2(1, 5), + new Tuple2(4, 5), + new Tuple2(2, 5)), + Arrays.asList( + new Tuple2(2, 5), + new Tuple2(3, 5), + new Tuple2(4, 5), + new Tuple2(1, 5))); + + List>> expected = Arrays.asList( + Arrays.asList( + new Tuple2(1, 5), + new Tuple2(2, 5), + new Tuple2(3, 5), + new Tuple2(4, 5)), + Arrays.asList( + new Tuple2(1, 5), + new Tuple2(2, 5), + new Tuple2(3, 5), + new Tuple2(4, 5))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream( + ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + + JavaPairDStream sorted = pairStream.transformToPair(in -> in.sortByKey()); + + JavaTestUtils.attachTestOutputStream(sorted); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testPairToNormalRDDTransform() { + List>> inputData = Arrays.asList( + Arrays.asList( + new Tuple2(3, 5), + new Tuple2(1, 5), + new Tuple2(4, 5), + new Tuple2(2, 5)), + Arrays.asList( + new Tuple2(2, 5), + new Tuple2(3, 5), + new Tuple2(4, 5), + new Tuple2(1, 5))); + + List> expected = Arrays.asList( + Arrays.asList(3, 1, 4, 2), + Arrays.asList(2, 3, 4, 1)); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream( + ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + JavaDStream firstParts = pairStream.transform(in -> in.map(x -> x._1())); + JavaTestUtils.attachTestOutputStream(firstParts); + List> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testMapValues() { + List>> inputData = stringStringKVStream; + + List>> expected = Arrays.asList( + Arrays.asList(new Tuple2("california", "DODGERS"), + new Tuple2("california", "GIANTS"), + new Tuple2("new york", "YANKEES"), + new Tuple2("new york", "METS")), + Arrays.asList(new Tuple2("california", "SHARKS"), + new Tuple2("california", "DUCKS"), + new Tuple2("new york", "RANGERS"), + new Tuple2("new york", "ISLANDERS"))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream( + ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + + JavaPairDStream mapped = pairStream.mapValues(s -> s.toUpperCase()); + JavaTestUtils.attachTestOutputStream(mapped); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + + Assert.assertEquals(expected, result); + } + + @Test + public void testFlatMapValues() { + List>> inputData = stringStringKVStream; + + List>> expected = Arrays.asList( + Arrays.asList(new Tuple2("california", "dodgers1"), + new Tuple2("california", "dodgers2"), + new Tuple2("california", "giants1"), + new Tuple2("california", "giants2"), + new Tuple2("new york", "yankees1"), + new Tuple2("new york", "yankees2"), + new Tuple2("new york", "mets1"), + new Tuple2("new york", "mets2")), + Arrays.asList(new Tuple2("california", "sharks1"), + new Tuple2("california", "sharks2"), + new Tuple2("california", "ducks1"), + new Tuple2("california", "ducks2"), + new Tuple2("new york", "rangers1"), + new Tuple2("new york", "rangers2"), + new Tuple2("new york", "islanders1"), + new Tuple2("new york", "islanders2"))); + + JavaDStream> stream = JavaTestUtils.attachTestInputStream( + ssc, inputData, 1); + JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); + + + JavaPairDStream flatMapped = pairStream.flatMapValues(in -> { + List out = new ArrayList(); + out.add(in + "1"); + out.add(in + "2"); + return out; + }); + JavaTestUtils.attachTestOutputStream(flatMapped); + List>> result = JavaTestUtils.runStreams(ssc, 2, 2); + Assert.assertEquals(expected, result); + } + +} diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index d45f6773fad78..092bd0c62eae7 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -90,6 +90,13 @@ object SparkBuild extends Build { } lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client" val maybeAvro = if (hadoopVersion.startsWith("0.23.") && isYarnEnabled) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq() + + lazy val javaVersion = System.getProperty("java.specification.version") + lazy val isJava8Enabled = if (javaVersion == "1.8") true else false + lazy val java8Tests = Project("java8-tests", file("java8-tests"), settings = java8TestsSettings) dependsOn(core % "compile->compile;test->test") dependsOn(streaming % "compile->compile;test->test") + // Conditionally include the yarn sub-project + val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]() + // Conditionally include the yarn sub-project lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core) lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core) @@ -118,10 +125,10 @@ object SparkBuild extends Build { lazy val examples = Project("examples", file("examples"), settings = examplesSettings) .dependsOn(core, mllib, graphx, bagel, streaming, externalTwitter) dependsOn(allExternal: _*) - // Everything except assembly, tools and examples belong to packageProjects + // Everything except assembly, tools, java8Tests and examples belong to packageProjects lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef - lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj) + lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.apache.spark", @@ -370,6 +377,12 @@ object SparkBuild extends Build { name := "spark-yarn" ) + def java8TestsSettings = sharedSettings ++ Seq( + name := "java8-tests", + javacOptions := Seq("-target", "1.8", "-source", "1.8"), + testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a") + ) + // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN). def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq() diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala index e23b725052864..721d50273259e 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala @@ -41,7 +41,7 @@ class JavaDStream[T](val dstream: DStream[T])(implicit val classTag: ClassTag[T] /** Return a new DStream containing only the elements that satisfy a predicate. */ def filter(f: JFunction[T, java.lang.Boolean]): JavaDStream[T] = - dstream.filter((x => f(x).booleanValue())) + dstream.filter((x => f.call(x).booleanValue())) /** Persist RDDs of this DStream with the default storage level (MEMORY_ONLY_SER) */ def cache(): JavaDStream[T] = dstream.cache() diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala index 7aa7ead29b469..ccdd1ac6794f8 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala @@ -17,19 +17,20 @@ package org.apache.spark.streaming.api.java -import java.util.{List => JList} +import java.util import java.lang.{Long => JLong} +import java.util.{List => JList} import scala.collection.JavaConversions._ import scala.reflect.ClassTag -import org.apache.spark.streaming._ -import org.apache.spark.api.java.{JavaPairRDD, JavaRDDLike, JavaRDD} -import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2} -import org.apache.spark.api.java.function.{Function3 => JFunction3, _} -import java.util +import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaRDDLike} +import org.apache.spark.api.java.JavaPairRDD._ +import org.apache.spark.api.java.JavaSparkContext.fakeClassTag +import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2, Function3 => JFunction3, _} import org.apache.spark.rdd.RDD -import JavaDStream._ +import org.apache.spark.streaming._ +import org.apache.spark.streaming.api.java.JavaDStream._ import org.apache.spark.streaming.dstream.DStream trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T, R]] @@ -123,23 +124,23 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T * this DStream. Applying glom() to an RDD coalesces all elements within each partition into * an array. */ - def glom(): JavaDStream[JList[T]] = { + def glom(): JavaDStream[JList[T]] = new JavaDStream(dstream.glom().map(x => new java.util.ArrayList[T](x.toSeq))) - } + /** Return the [[org.apache.spark.streaming.StreamingContext]] associated with this DStream */ - def context(): StreamingContext = dstream.context() + def context(): StreamingContext = dstream.context /** Return a new DStream by applying a function to all elements of this DStream. */ def map[R](f: JFunction[T, R]): JavaDStream[R] = { - new JavaDStream(dstream.map(f)(f.returnType()))(f.returnType()) + new JavaDStream(dstream.map(f)(fakeClassTag))(fakeClassTag) } /** Return a new DStream by applying a function to all elements of this DStream. */ - def map[K2, V2](f: PairFunction[T, K2, V2]): JavaPairDStream[K2, V2] = { + def mapToPair[K2, V2](f: PairFunction[T, K2, V2]): JavaPairDStream[K2, V2] = { def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]] - new JavaPairDStream(dstream.map(f)(cm))(f.keyType(), f.valueType()) + new JavaPairDStream(dstream.map[(K2, V2)](f)(cm))(fakeClassTag[K2], fakeClassTag[V2]) } /** @@ -148,19 +149,19 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T */ def flatMap[U](f: FlatMapFunction[T, U]): JavaDStream[U] = { import scala.collection.JavaConverters._ - def fn = (x: T) => f.apply(x).asScala - new JavaDStream(dstream.flatMap(fn)(f.elementType()))(f.elementType()) + def fn = (x: T) => f.call(x).asScala + new JavaDStream(dstream.flatMap(fn)(fakeClassTag[U]))(fakeClassTag[U]) } /** * Return a new DStream by applying a function to all elements of this DStream, * and then flattening the results */ - def flatMap[K2, V2](f: PairFlatMapFunction[T, K2, V2]): JavaPairDStream[K2, V2] = { + def flatMapToPair[K2, V2](f: PairFlatMapFunction[T, K2, V2]): JavaPairDStream[K2, V2] = { import scala.collection.JavaConverters._ - def fn = (x: T) => f.apply(x).asScala + def fn = (x: T) => f.call(x).asScala def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]] - new JavaPairDStream(dstream.flatMap(fn)(cm))(f.keyType(), f.valueType()) + new JavaPairDStream(dstream.flatMap(fn)(cm))(fakeClassTag[K2], fakeClassTag[V2]) } /** @@ -169,8 +170,8 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T * of the RDD. */ def mapPartitions[U](f: FlatMapFunction[java.util.Iterator[T], U]): JavaDStream[U] = { - def fn = (x: Iterator[T]) => asScalaIterator(f.apply(asJavaIterator(x)).iterator()) - new JavaDStream(dstream.mapPartitions(fn)(f.elementType()))(f.elementType()) + def fn = (x: Iterator[T]) => asScalaIterator(f.call(asJavaIterator(x)).iterator()) + new JavaDStream(dstream.mapPartitions(fn)(fakeClassTag[U]))(fakeClassTag[U]) } /** @@ -178,10 +179,10 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T * of this DStream. Applying mapPartitions() to an RDD applies a function to each partition * of the RDD. */ - def mapPartitions[K2, V2](f: PairFlatMapFunction[java.util.Iterator[T], K2, V2]) + def mapPartitionsToPair[K2, V2](f: PairFlatMapFunction[java.util.Iterator[T], K2, V2]) : JavaPairDStream[K2, V2] = { - def fn = (x: Iterator[T]) => asScalaIterator(f.apply(asJavaIterator(x)).iterator()) - new JavaPairDStream(dstream.mapPartitions(fn))(f.keyType(), f.valueType()) + def fn = (x: Iterator[T]) => asScalaIterator(f.call(asJavaIterator(x)).iterator()) + new JavaPairDStream(dstream.mapPartitions(fn))(fakeClassTag[K2], fakeClassTag[V2]) } /** @@ -306,7 +307,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T * Return a new DStream in which each RDD is generated by applying a function * on each RDD of 'this' DStream. */ - def transform[K2, V2](transformFunc: JFunction[R, JavaPairRDD[K2, V2]]): + def transformToPair[K2, V2](transformFunc: JFunction[R, JavaPairRDD[K2, V2]]): JavaPairDStream[K2, V2] = { implicit val cmk: ClassTag[K2] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]] @@ -321,7 +322,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T * Return a new DStream in which each RDD is generated by applying a function * on each RDD of 'this' DStream. */ - def transform[K2, V2](transformFunc: JFunction2[R, Time, JavaPairRDD[K2, V2]]): + def transformToPair[K2, V2](transformFunc: JFunction2[R, Time, JavaPairRDD[K2, V2]]): JavaPairDStream[K2, V2] = { implicit val cmk: ClassTag[K2] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]] @@ -353,7 +354,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T * Return a new DStream in which each RDD is generated by applying a function * on each RDD of 'this' DStream and 'other' DStream. */ - def transformWith[U, K2, V2]( + def transformWithToPair[U, K2, V2]( other: JavaDStream[U], transformFunc: JFunction3[R, JavaRDD[U], Time, JavaPairRDD[K2, V2]] ): JavaPairDStream[K2, V2] = { @@ -391,7 +392,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T * Return a new DStream in which each RDD is generated by applying a function * on each RDD of 'this' DStream and 'other' DStream. */ - def transformWith[K2, V2, K3, V3]( + def transformWithToPair[K2, V2, K3, V3]( other: JavaPairDStream[K2, V2], transformFunc: JFunction3[R, JavaPairRDD[K2, V2], Time, JavaPairRDD[K3, V3]] ): JavaPairDStream[K3, V3] = { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala index 2c7ff87744d7a..14c846066906d 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala @@ -25,12 +25,13 @@ import scala.reflect.ClassTag import org.apache.spark.streaming._ import org.apache.spark.streaming.StreamingContext._ -import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2, Function3 => JFunction3} +import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2} import org.apache.spark.Partitioner import org.apache.hadoop.mapred.{JobConf, OutputFormat} import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} import org.apache.hadoop.conf.Configuration -import org.apache.spark.api.java.{JavaUtils, JavaRDD, JavaPairRDD} +import org.apache.spark.api.java.{JavaUtils, JavaPairRDD} +import org.apache.spark.api.java.JavaPairRDD._ import org.apache.spark.storage.StorageLevel import com.google.common.base.Optional import org.apache.spark.rdd.RDD @@ -54,7 +55,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( /** Return a new DStream containing only the elements that satisfy a predicate. */ def filter(f: JFunction[(K, V), java.lang.Boolean]): JavaPairDStream[K, V] = - dstream.filter((x => f(x).booleanValue())) + dstream.filter((x => f.call(x).booleanValue())) /** Persist RDDs of this DStream with the default storage level (MEMORY_ONLY_SER) */ def cache(): JavaPairDStream[K, V] = dstream.cache() @@ -279,7 +280,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * DStream's batching interval */ def reduceByKeyAndWindow( - reduceFunc: Function2[V, V, V], + reduceFunc: JFunction2[V, V, V], windowDuration: Duration, slideDuration: Duration ):JavaPairDStream[K, V] = { @@ -299,7 +300,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * @param numPartitions Number of partitions of each RDD in the new DStream. */ def reduceByKeyAndWindow( - reduceFunc: Function2[V, V, V], + reduceFunc: JFunction2[V, V, V], windowDuration: Duration, slideDuration: Duration, numPartitions: Int @@ -320,7 +321,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * DStream. */ def reduceByKeyAndWindow( - reduceFunc: Function2[V, V, V], + reduceFunc: JFunction2[V, V, V], windowDuration: Duration, slideDuration: Duration, partitioner: Partitioner @@ -345,8 +346,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * DStream's batching interval */ def reduceByKeyAndWindow( - reduceFunc: Function2[V, V, V], - invReduceFunc: Function2[V, V, V], + reduceFunc: JFunction2[V, V, V], + invReduceFunc: JFunction2[V, V, V], windowDuration: Duration, slideDuration: Duration ): JavaPairDStream[K, V] = { @@ -374,8 +375,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * set this to null if you do not want to filter */ def reduceByKeyAndWindow( - reduceFunc: Function2[V, V, V], - invReduceFunc: Function2[V, V, V], + reduceFunc: JFunction2[V, V, V], + invReduceFunc: JFunction2[V, V, V], windowDuration: Duration, slideDuration: Duration, numPartitions: Int, @@ -412,8 +413,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * set this to null if you do not want to filter */ def reduceByKeyAndWindow( - reduceFunc: Function2[V, V, V], - invReduceFunc: Function2[V, V, V], + reduceFunc: JFunction2[V, V, V], + invReduceFunc: JFunction2[V, V, V], windowDuration: Duration, slideDuration: Duration, partitioner: Partitioner, diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala index b082bb058529b..c48d754e439e9 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala @@ -187,7 +187,7 @@ class JavaStreamingContext(val ssc: StreamingContext) { converter: JFunction[InputStream, java.lang.Iterable[T]], storageLevel: StorageLevel) : JavaDStream[T] = { - def fn = (x: InputStream) => converter.apply(x).toIterator + def fn = (x: InputStream) => converter.call(x).toIterator implicit val cmt: ClassTag[T] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]] ssc.socketStream(hostname, port, fn, storageLevel) @@ -431,7 +431,7 @@ class JavaStreamingContext(val ssc: StreamingContext) { * In the transform function, convert the JavaRDD corresponding to that JavaDStream to * a JavaPairRDD using org.apache.spark.api.java.JavaPairRDD.fromJavaRDD(). */ - def transform[K, V]( + def transformToPair[K, V]( dstreams: JList[JavaDStream[_]], transformFunc: JFunction2[JList[JavaRDD[_]], Time, JavaPairRDD[K, V]] ): JavaPairDStream[K, V] = { diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java index 54a0791d04ea4..d04be0617bbe7 100644 --- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java +++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java @@ -247,14 +247,14 @@ public Iterable call(Iterator in) { Assert.assertEquals(expected, result); } - private class IntegerSum extends Function2 { + private class IntegerSum implements Function2 { @Override public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } } - private class IntegerDifference extends Function2 { + private class IntegerDifference implements Function2 { @Override public Integer call(Integer i1, Integer i2) throws Exception { return i1 - i2; @@ -392,7 +392,7 @@ public JavaRDD call(JavaRDD in) throws Exception { } ); - JavaPairDStream transformed3 = stream.transform( + JavaPairDStream transformed3 = stream.transformToPair( new Function, JavaPairRDD>() { @Override public JavaPairRDD call(JavaRDD in) throws Exception { return null; @@ -400,7 +400,7 @@ public JavaRDD call(JavaRDD in) throws Exception { } ); - JavaPairDStream transformed4 = stream.transform( + JavaPairDStream transformed4 = stream.transformToPair( new Function2, Time, JavaPairRDD>() { @Override public JavaPairRDD call(JavaRDD in, Time time) throws Exception { return null; @@ -424,7 +424,7 @@ public JavaRDD call(JavaRDD in) throws Exception { } ); - JavaPairDStream pairTransformed3 = pairStream.transform( + JavaPairDStream pairTransformed3 = pairStream.transformToPair( new Function, JavaPairRDD>() { @Override public JavaPairRDD call(JavaPairRDD in) throws Exception { return null; @@ -432,7 +432,7 @@ public JavaRDD call(JavaRDD in) throws Exception { } ); - JavaPairDStream pairTransformed4 = pairStream.transform( + JavaPairDStream pairTransformed4 = pairStream.transformToPair( new Function2, Time, JavaPairRDD>() { @Override public JavaPairRDD call(JavaPairRDD in, Time time) throws Exception { return null; @@ -482,7 +482,7 @@ public void testTransformWith() { ssc, stringStringKVStream2, 1); JavaPairDStream pairStream2 = JavaPairDStream.fromJavaDStream(stream2); - JavaPairDStream> joined = pairStream1.transformWith( + JavaPairDStream> joined = pairStream1.transformWithToPair( pairStream2, new Function3< JavaPairRDD, @@ -551,7 +551,7 @@ public JavaRDD call(JavaRDD rdd1, JavaPairRDD } ); - JavaPairDStream transformed3 = stream1.transformWith( + JavaPairDStream transformed3 = stream1.transformWithToPair( stream2, new Function3, JavaRDD, Time, JavaPairRDD>() { @Override @@ -561,7 +561,7 @@ public JavaPairRDD call(JavaRDD rdd1, JavaRDD r } ); - JavaPairDStream transformed4 = stream1.transformWith( + JavaPairDStream transformed4 = stream1.transformWithToPair( pairStream1, new Function3, JavaPairRDD, Time, JavaPairRDD>() { @Override @@ -591,7 +591,7 @@ public JavaRDD call(JavaPairRDD rdd1, JavaPairRDD pairTransformed3 = pairStream1.transformWith( + JavaPairDStream pairTransformed3 = pairStream1.transformWithToPair( stream2, new Function3, JavaRDD, Time, JavaPairRDD>() { @Override @@ -601,7 +601,7 @@ public JavaPairRDD call(JavaPairRDD rdd1, JavaR } ); - JavaPairDStream pairTransformed4 = pairStream1.transformWith( + JavaPairDStream pairTransformed4 = pairStream1.transformWithToPair( pairStream2, new Function3, JavaPairRDD, Time, JavaPairRDD>() { @Override @@ -656,7 +656,7 @@ public JavaRDD call(List> listOfRDDs, Time time) { List> listOfDStreams2 = Arrays.>asList(stream1, stream2, pairStream1.toJavaDStream()); - JavaPairDStream> transformed2 = ssc.transform( + JavaPairDStream> transformed2 = ssc.transformToPair( listOfDStreams2, new Function2>, Time, JavaPairRDD>>() { public JavaPairRDD> call(List> listOfRDDs, Time time) { @@ -671,7 +671,7 @@ public Tuple2 call(Integer i) throws Exception { return new Tuple2(i, i); } }; - return rdd1.union(rdd2).map(mapToTuple).join(prdd3); + return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3); } } ); @@ -742,17 +742,17 @@ public void testPairFlatMap() { new Tuple2(9, "s"))); JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); - JavaPairDStream flatMapped = stream.flatMap( - new PairFlatMapFunction() { - @Override - public Iterable> call(String in) throws Exception { - List> out = Lists.newArrayList(); - for (String letter: in.split("(?!^)")) { - out.add(new Tuple2(in.length(), letter)); - } - return out; + JavaPairDStream flatMapped = stream.flatMapToPair( + new PairFlatMapFunction() { + @Override + public Iterable> call(String in) throws Exception { + List> out = Lists.newArrayList(); + for (String letter: in.split("(?!^)")) { + out.add(new Tuple2(in.length(), letter)); } - }); + return out; + } + }); JavaTestUtils.attachTestOutputStream(flatMapped); List>> result = JavaTestUtils.runStreams(ssc, 3, 3); @@ -816,7 +816,7 @@ public void testPairFilter() { Arrays.asList(new Tuple2("yankees", 7))); JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); - JavaPairDStream pairStream = stream.map( + JavaPairDStream pairStream = stream.mapToPair( new PairFunction() { @Override public Tuple2 call(String in) throws Exception { @@ -880,7 +880,7 @@ public void testPairMap() { // Maps pair -> pair of different type JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); - JavaPairDStream reversed = pairStream.map( + JavaPairDStream reversed = pairStream.mapToPair( new PairFunction, Integer, String>() { @Override public Tuple2 call(Tuple2 in) throws Exception { @@ -913,7 +913,7 @@ public void testPairMapPartitions() { // Maps pair -> pair of different type JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); - JavaPairDStream reversed = pairStream.mapPartitions( + JavaPairDStream reversed = pairStream.mapPartitionsToPair( new PairFlatMapFunction>, Integer, String>() { @Override public Iterable> call(Iterator> in) throws Exception { @@ -983,7 +983,7 @@ public void testPairToPairFlatMapWithChangingTypes() { // Maps pair -> pair JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); - JavaPairDStream flatMapped = pairStream.flatMap( + JavaPairDStream flatMapped = pairStream.flatMapToPair( new PairFlatMapFunction, Integer, String>() { @Override public Iterable> call(Tuple2 in) throws Exception { @@ -1300,7 +1300,7 @@ public void testPairTransform() { ssc, inputData, 1); JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); - JavaPairDStream sorted = pairStream.transform( + JavaPairDStream sorted = pairStream.transformToPair( new Function, JavaPairRDD>() { @Override public JavaPairRDD call(JavaPairRDD in) throws Exception { @@ -1632,7 +1632,8 @@ public void testSocketTextStream() { @Test public void testSocketString() { - class Converter extends Function> { + + class Converter implements Function> { public Iterable call(InputStream in) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(in)); List out = new ArrayList(); From 4ab87d3551f0b74e4fb6da611a5baea7aba93c6c Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Tue, 25 Feb 2014 11:02:15 +0530 Subject: [PATCH 2/9] Review feedback on the pr --- .../java/function/DoubleFlatMapFunction.java | 3 ++ .../api/java/function/DoubleFunction.java | 5 ++- .../api/java/function/FlatMapFunction.java | 3 ++ .../api/java/function/FlatMapFunction2.java | 3 ++ .../spark/api/java/function/Function.java | 5 +++ .../spark/api/java/function/Function2.java | 3 ++ .../spark/api/java/function/Function3.java | 3 ++ .../java/function/PairFlatMapFunction.java | 7 +++- .../spark/api/java/function/PairFunction.java | 8 +++- .../spark/api/java/function/VoidFunction.java | 3 ++ .../apache/spark/api/java/JavaPairRDD.scala | 15 +++++--- .../apache/spark/api/java/JavaRDDLike.scala | 4 +- .../java/org/apache/spark/JavaAPISuite.java | 4 +- docs/java-programming-guide.md | 22 +++++++++-- .../java/org/apache/spark/Java8APISuite.java | 37 ++++++++++--------- .../apache/spark/streaming/Java8APISuite.java | 35 +++++++++++------- project/SparkBuild.scala | 9 +++-- .../apache/spark/streaming/JavaAPISuite.java | 17 ++++----- 18 files changed, 124 insertions(+), 62 deletions(-) diff --git a/core/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java b/core/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java index 32f4b50c62a38..57fd0a7a80494 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java +++ b/core/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java @@ -19,6 +19,9 @@ import java.io.Serializable; +/** + * A function that returns zero or more records of type Double from each input record. + */ public interface DoubleFlatMapFunction extends Serializable { public Iterable call(T t) throws Exception; } diff --git a/core/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java b/core/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java index 22cce23b0c9ea..150144e0e418c 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java +++ b/core/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java @@ -19,6 +19,9 @@ import java.io.Serializable; +/** + * A function that returns Doubles, and can be used to construct DoubleRDDs. + */ public interface DoubleFunction extends Serializable { - public Double call(T t) throws Exception; + public double call(T t) throws Exception; } diff --git a/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java b/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java index 5f51279ca7571..fa75842047c6a 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java +++ b/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java @@ -19,6 +19,9 @@ import java.io.Serializable; +/** + * A function that returns zero or more output records from each input record. + */ public interface FlatMapFunction extends Serializable { public Iterable call(T t) throws Exception; } \ No newline at end of file diff --git a/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java b/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java index 5bb9361ba85aa..d1fdec072443d 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java +++ b/core/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java @@ -19,6 +19,9 @@ import java.io.Serializable; +/** + * A function that takes two inputs and returns zero or more output records. + */ public interface FlatMapFunction2 extends Serializable { public Iterable call(T1 t1, T2 t2) throws Exception; } \ No newline at end of file diff --git a/core/src/main/java/org/apache/spark/api/java/function/Function.java b/core/src/main/java/org/apache/spark/api/java/function/Function.java index e4e8e641dcd7a..d00551bb0add6 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/Function.java +++ b/core/src/main/java/org/apache/spark/api/java/function/Function.java @@ -19,6 +19,11 @@ import java.io.Serializable; +/** + * Base interface for functions whose return types do not create special RDDs. PairFunction and + * DoubleFunction are handled separately, to allow PairRDDs and DoubleRDDs to be constructed + * when mapping RDDs of other types. + */ public interface Function extends Serializable { public R call(T1 v1) throws Exception; } diff --git a/core/src/main/java/org/apache/spark/api/java/function/Function2.java b/core/src/main/java/org/apache/spark/api/java/function/Function2.java index d24e9e3a563f7..793caaa61ac5a 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/Function2.java +++ b/core/src/main/java/org/apache/spark/api/java/function/Function2.java @@ -19,6 +19,9 @@ import java.io.Serializable; +/** + * A two-argument function that takes arguments of type T1 and T2 and returns an R. + */ public interface Function2 extends Serializable { public R call(T1 v1, T2 v2) throws Exception; } diff --git a/core/src/main/java/org/apache/spark/api/java/function/Function3.java b/core/src/main/java/org/apache/spark/api/java/function/Function3.java index 73e388b0e3825..b4151c3417df4 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/Function3.java +++ b/core/src/main/java/org/apache/spark/api/java/function/Function3.java @@ -19,6 +19,9 @@ import java.io.Serializable; +/** + * A three-argument function that takes arguments of type T1, T2 and T3 and returns an R. + */ public interface Function3 extends Serializable { public R call(T1 v1, T2 v2, T3 v3) throws Exception; } diff --git a/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java b/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java index 61793709d1608..c5c65d40bc2fb 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java +++ b/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java @@ -21,6 +21,11 @@ import java.io.Serializable; + +/** + * A function that returns zero or more key-value pair records from each input record. The + * key-value pairs are represented as scala.Tuple2 objects. + */ public interface PairFlatMapFunction extends Serializable { - public Iterable> call(T t) throws Exception; + public Iterable> call(T t) throws Exception; } diff --git a/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java index 707cd4067ed58..d32cf30c624b4 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java +++ b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java @@ -17,9 +17,13 @@ package org.apache.spark.api.java.function; -import java.io.Serializable; import scala.Tuple2; +import java.io.Serializable; + +/** + * A function that returns key-value pairs (Tuple2), and can be used to construct PairRDDs. + */ public interface PairFunction extends Serializable { - public Tuple2 call(T t) throws Exception; + public Tuple2 call(T t) throws Exception; } diff --git a/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java b/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java index dc5ddd5e90410..2a10435b7523a 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java +++ b/core/src/main/java/org/apache/spark/api/java/function/VoidFunction.java @@ -19,6 +19,9 @@ import java.io.Serializable; +/** + * A function with no return value. + */ public interface VoidFunction extends Serializable { public void call(T t) throws Exception; } diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala index d791f8f5f7485..857626fe84af9 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala @@ -699,14 +699,17 @@ object JavaPairRDD { } implicit def toRDD[K, V](rdd: JavaPairRDD[K, V]): RDD[(K, V)] = rdd.rdd + private[spark] - implicit def toJFunction2[T1, T2, R](fun: JFunction2[T1, T2, R]) - : Function2[T1, T2, R] = (x: T1, x1: T2) => fun.call(x, x1) - private[spark] - implicit def toJFunction[T, R](fun: JFunction[T, R]): T => R = (x) => fun.call(x) + implicit def toScalaFunction2[T1, T2, R](fun: JFunction2[T1, T2, R]): Function2[T1, T2, R] = { + (x: T1, x1: T2) => fun.call(x, x1) + } + + private[spark] implicit def toScalaFunction[T, R](fun: JFunction[T, R]): T => R = x => fun.call(x) + private[spark] - implicit def pairFunToScalaFun[A, B, C](x: PairFunction[A, B, C]): A => (B, C) = - (y: A) => x.call(y) + implicit def pairFunToScalaFun[A, B, C](x: PairFunction[A, B, C]): A => (B, C) = y => x.call(y) + /** Convert a JavaRDD of key-value pairs to JavaPairRDD. */ def fromJavaRDD[K, V](rdd: JavaRDD[(K, V)]): JavaPairRDD[K, V] = { implicit val ctagK: ClassTag[K] = fakeClassTag diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index e8727774739d5..af0114bee3f49 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -90,7 +90,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { * Return a new RDD by applying a function to all elements of this RDD. */ def mapToPair[K2, V2](f: PairFunction[T, K2, V2]): JavaPairRDD[K2, V2] = { - def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]] + def cm = implicitly[ClassTag[(K2, V2)]] new JavaPairRDD(rdd.map[(K2, V2)](f)(cm))(fakeClassTag[K2], fakeClassTag[V2]) } @@ -121,7 +121,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { def flatMapToPair[K2, V2](f: PairFlatMapFunction[T, K2, V2]): JavaPairRDD[K2, V2] = { import scala.collection.JavaConverters._ def fn = (x: T) => f.call(x).asScala - def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]] + def cm = implicitly[ClassTag[(K2, V2)]] JavaPairRDD.fromRDD(rdd.flatMap(fn)(cm))(fakeClassTag[K2], fakeClassTag[V2]) } diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java index 881cd055d9712..c7d0e2d577726 100644 --- a/core/src/test/java/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java @@ -388,7 +388,7 @@ public void map() { JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaDoubleRDD doubles = rdd.mapToDouble(new DoubleFunction() { @Override - public Double call(Integer x) { + public double call(Integer x) { return 1.0 * x; } }).cache(); @@ -768,7 +768,7 @@ public void zip() { JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaDoubleRDD doubles = rdd.mapToDouble(new DoubleFunction() { @Override - public Double call(Integer x) { + public double call(Integer x) { return 1.0 * x; } }); diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md index 5c73dbb25ede8..469abe45e33e5 100644 --- a/docs/java-programming-guide.md +++ b/docs/java-programming-guide.md @@ -30,6 +30,12 @@ There are a few key differences between the Java and Scala APIs: classes for key-value pairs and doubles. For example, [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD) stores key-value pairs. +* To support upcoming java 8 lambda expression, methods are defined on the basis of + the passed anonymous function's (a.k.a lambda expression) return type, + for example mapToPair(...) or flatMapToPair returns + [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD), + similarly mapToDouble and flatMapToDouble returns + [`JavaDoubleRDD`](api/core/index.html#org.apache.spark.api.java.JavaDoubleRDD). * RDD methods like `collect()` and `countByKey()` return Java collections types, such as `java.util.List` and `java.util.Map`. * Key-value pairs, which are simply written as `(key, value)` in Scala, are represented @@ -78,7 +84,6 @@ RDD [storage level](scala-programming-guide.html#rdd-persistence) constants, suc declared in the [org.apache.spark.api.java.StorageLevels](api/core/index.html#org.apache.spark.api.java.StorageLevels) class. To define your own storage level, you can use StorageLevels.create(...). - # Other Features The Java API supports other Spark features, including @@ -127,11 +132,20 @@ class Split extends FlatMapFunction { JavaRDD words = lines.flatMap(new Split()); {% endhighlight %} +Java 8+ users can also possibly write the above `FlatMapFunction` in a more concise way using +lambda expression as follows: + +{% highlight java %} +JavaRDD words = lines.flatMap(s -> Arrays.asList(s.split(" "))); +{% endhighlight %} + +Same possibility applies to all passed in anonymous classes in java 8. + Continuing with the word count example, we map each word to a `(word, 1)` pair: {% highlight java %} import scala.Tuple2; -JavaPairRDD ones = words.map( +JavaPairRDD ones = words.mapToPair( new PairFunction() { public Tuple2 call(String s) { return new Tuple2(s, 1); @@ -140,7 +154,7 @@ JavaPairRDD ones = words.map( ); {% endhighlight %} -Note that `map` was passed a `PairFunction` and +Note that `mapToPair` was passed a `PairFunction` and returned a `JavaPairRDD`. To finish the word count program, we will use `reduceByKey` to count the @@ -164,7 +178,7 @@ possible to chain the RDD transformations, so the word count example could also be written as: {% highlight java %} -JavaPairRDD counts = lines.flatMap( +JavaPairRDD counts = lines.flatMapToPair( ... ).map( ... diff --git a/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java b/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java index fc47b69dd10fb..d70fe8cb12118 100644 --- a/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java +++ b/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java @@ -17,26 +17,27 @@ package org.apache.spark; +import java.io.File; +import java.io.Serializable; +import java.util.*; + +import scala.Tuple2; + import com.google.common.base.Optional; import com.google.common.io.Files; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.spark.api.java.JavaDoubleRDD; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.*; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import scala.Tuple2; - -import java.io.File; -import java.io.Serializable; -import java.util.*; +import org.apache.spark.api.java.JavaDoubleRDD; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.*; /** * Most of these tests replicate org.apache.spark.JavaAPISuite using java 8 @@ -221,12 +222,12 @@ public void mapsFromPairsToPairs() { public void mapPartitions() { JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2); JavaRDD partitionSums = rdd.mapPartitions(iter -> { - int sum = 0; - while (iter.hasNext()) { - sum += iter.next(); - } - return Collections.singletonList(sum); - }); + int sum = 0; + while (iter.hasNext()) { + sum += iter.next(); + } + return Collections.singletonList(sum); + }); Assert.assertEquals("[3, 7]", partitionSums.collect().toString()); } @@ -242,8 +243,8 @@ public void sequenceFile() { ); JavaPairRDD rdd = sc.parallelizePairs(pairs); - rdd.mapToPair( - pair -> new Tuple2(new IntWritable(pair._1()), new Text(pair._2()))) + rdd.mapToPair(pair -> + new Tuple2(new IntWritable(pair._1()), new Text(pair._2()))) .saveAsHadoopFile(outputDir, IntWritable.class, Text.class, SequenceFileOutputFormat.class); // Try reading the output back as an object file diff --git a/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java b/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java index cd3c925e50fbe..43df0dea614bc 100644 --- a/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java +++ b/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java @@ -17,21 +17,23 @@ package org.apache.spark.streaming; +import java.io.Serializable; +import java.util.*; + +import scala.Tuple2; + import com.google.common.base.Optional; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import org.junit.Assert; +import org.junit.Test; + import org.apache.spark.HashPartitioner; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaPairDStream; -import org.junit.Assert; -import org.junit.Test; -import scala.Tuple2; - -import java.io.Serializable; -import java.util.*; /** * Most of these tests replicate org.apache.spark.streaming.JavaAPISuite using java 8 @@ -329,7 +331,8 @@ public void testStreamingContextTransform() { return rdd1.union(rdd2).mapToPair(mapToTuple).join(prdd3); }); JavaTestUtils.attachTestOutputStream(transformed2); - List>>> result = JavaTestUtils.runStreams(ssc, 2, 2); + List>>> result = + JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(expected, result); } @@ -475,7 +478,8 @@ public void testPairMap() { // Maps pair -> pair of different type new Tuple2(3, "new york"), new Tuple2(1, "new york"))); - JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream> stream = + JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream reversed = pairStream.mapToPair(x -> x.swap()); JavaTestUtils.attachTestOutputStream(reversed); @@ -500,7 +504,8 @@ public void testPairMapPartitions() { // Maps pair -> pair of different type new Tuple2(3, "new york"), new Tuple2(1, "new york"))); - JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream> stream = + JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream reversed = pairStream.mapPartitionsToPair(in -> { LinkedList> out = new LinkedList>(); @@ -634,7 +639,8 @@ public void testReduceByKeyAndWindow() { Arrays.asList(new Tuple2("california", 10), new Tuple2("new york", 4))); - JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream> stream = + JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream reduceWindowed = @@ -657,7 +663,8 @@ public void testUpdateStateByKey() { Arrays.asList(new Tuple2("california", 14), new Tuple2("new york", 9))); - JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream> stream = + JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream updated = pairStream.updateStateByKey((values, state) -> { @@ -689,11 +696,13 @@ public void testReduceByKeyAndWindowWithInverse() { Arrays.asList(new Tuple2("california", 10), new Tuple2("new york", 4))); - JavaDStream> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaDStream> stream = + JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream reduceWindowed = - pairStream.reduceByKeyAndWindow((x, y) -> x + y, (x, y) -> x - y, new Duration(2000), new Duration(1000)); + pairStream.reduceByKeyAndWindow((x, y) -> x + y, (x, y) -> x - y, new Duration(2000), + new Duration(1000)); JavaTestUtils.attachTestOutputStream(reduceWindowed); List>> result = JavaTestUtils.runStreams(ssc, 3, 3); diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 092bd0c62eae7..4601e398ba722 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -91,11 +91,11 @@ object SparkBuild extends Build { lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client" val maybeAvro = if (hadoopVersion.startsWith("0.23.") && isYarnEnabled) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq() + // Conditionally include the java 8 sub-project lazy val javaVersion = System.getProperty("java.specification.version") - lazy val isJava8Enabled = if (javaVersion == "1.8") true else false - lazy val java8Tests = Project("java8-tests", file("java8-tests"), settings = java8TestsSettings) dependsOn(core % "compile->compile;test->test") dependsOn(streaming % "compile->compile;test->test") - // Conditionally include the yarn sub-project + lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]() + lazy val java8Tests = Project("java8-tests", file("java8-tests"), settings = java8TestsSettings) dependsOn(core % "compile->compile;test->test") dependsOn(streaming % "compile->compile;test->test") // Conditionally include the yarn sub-project lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core) @@ -128,7 +128,8 @@ object SparkBuild extends Build { // Everything except assembly, tools, java8Tests and examples belong to packageProjects lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef - lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests + lazy val allProjects = packageProjects ++ allExternalRefs ++ + Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.apache.spark", diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java index d04be0617bbe7..a6f8bcc4b8071 100644 --- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java +++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java @@ -742,15 +742,13 @@ public void testPairFlatMap() { new Tuple2(9, "s"))); JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); - JavaPairDStream flatMapped = stream.flatMapToPair( + JavaPairDStream flatMapped = stream.flatMapToPair( new PairFlatMapFunction() { - @Override - public Iterable> call(String in) throws Exception { - List> out = Lists.newArrayList(); - for (String letter: in.split("(?!^)")) { - out.add(new Tuple2(in.length(), letter)); - } - return out; + @Override + public Iterable> call(String in) throws Exception { + List> out = Lists.newArrayList(); + for (String letter: in.split("(?!^)")) { + out.add(new Tuple2(in.length(), letter)); } }); JavaTestUtils.attachTestOutputStream(flatMapped); @@ -1228,7 +1226,8 @@ public void testReduceByKeyAndWindowWithInverse() { JavaPairDStream pairStream = JavaPairDStream.fromJavaDStream(stream); JavaPairDStream reduceWindowed = - pairStream.reduceByKeyAndWindow(new IntegerSum(), new IntegerDifference(), new Duration(2000), new Duration(1000)); + pairStream.reduceByKeyAndWindow(new IntegerSum(), new IntegerDifference(), + new Duration(2000), new Duration(1000)); JavaTestUtils.attachTestOutputStream(reduceWindowed); List>> result = JavaTestUtils.runStreams(ssc, 3, 3); From 31d4cd63c8f2965a4f864459e5dcf3ab029ec2eb Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Tue, 25 Feb 2014 16:31:53 +0530 Subject: [PATCH 3/9] Maven build to support -Pjava8-tests flag. --- java8-tests/pom.xml | 188 +++++++++++++++++++++++++++++++++++++++ pom.xml | 25 ++++++ project/SparkBuild.scala | 2 +- 3 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 java8-tests/pom.xml diff --git a/java8-tests/pom.xml b/java8-tests/pom.xml new file mode 100644 index 0000000000000..31c699e7a93c0 --- /dev/null +++ b/java8-tests/pom.xml @@ -0,0 +1,188 @@ + + + + 4.0.0 + + org.apache.spark + spark-parent + 1.0.0-incubating-SNAPSHOT + ../pom.xml + + + org.apache.spark + java8-tests_2.10 + pom + Spark Project Java8 Tests POM + + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${project.version} + test-jar + + + com.novocode + junit-interface + test + + + org.scalatest + scalatest_${scala.binary.version} + test + + + + + + java8-tests + + + + org.apache.maven.plugins + maven-surefire-plugin + + + test + + test + + + + + false + + **/Suite*.java + **/*Suite.java + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + test-compile-first + process-test-resources + + testCompile + + + + + true + true + true + 1.8 + 1.8 + 1.8 + UTF-8 + 1024m + + + + + net.alchim31.maven + scala-maven-plugin + + + none + + + scala-compile-first + none + + + scala-test-compile-first + none + + + attach-scaladocs + none + + + + + org.scalatest + scalatest-maven-plugin + + + test + none + + + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + none + + + + + net.alchim31.maven + scala-maven-plugin + + + none + + + scala-compile-first + none + + + scala-test-compile-first + none + + + attach-scaladocs + none + + + + + org.scalatest + scalatest-maven-plugin + + + test + none + + + + + + diff --git a/pom.xml b/pom.xml index 7e28d7c1941bb..12ec46d517aad 100644 --- a/pom.xml +++ b/pom.xml @@ -710,6 +710,31 @@ yarn + + + java8-tests + + + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + + test-jar + + + + + + + + + java8-tests + + diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 4601e398ba722..4b1fa0227d909 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -95,7 +95,7 @@ object SparkBuild extends Build { lazy val javaVersion = System.getProperty("java.specification.version") lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]() - lazy val java8Tests = Project("java8-tests", file("java8-tests"), settings = java8TestsSettings) dependsOn(core % "compile->compile;test->test") dependsOn(streaming % "compile->compile;test->test") + lazy val java8Tests = Project("java8-tests", file("java8-tests"), settings = java8TestsSettings) dependsOn(core) dependsOn(streaming % "compile->compile;test->test") // Conditionally include the yarn sub-project lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core) From 35d8d79e4f1ccb6491b81fd670043e2b6c60a815 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 26 Feb 2014 15:34:01 +0530 Subject: [PATCH 4/9] Specified java 8 building in the docs --- docs/building-with-maven.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md index ded12926885b9..d785414c00343 100644 --- a/docs/building-with-maven.md +++ b/docs/building-with-maven.md @@ -25,6 +25,8 @@ If you don't run this, you may see errors like the following: You can fix this by setting the `MAVEN_OPTS` variable as discussed before. +*Note: For Java 1.8 and above this step is not required.* + ## Specifying the Hadoop version ## Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. @@ -76,3 +78,11 @@ The maven build includes support for building a Debian package containing the as $ mvn -Pdeb -DskipTests clean package The debian package can then be found under assembly/target. We added the short commit hash to the file name so that we can distinguish individual packages built for SNAPSHOT versions. + +## Running java 8 test suites. + +Running only java 8 tests and nothing else. + + $ mvn test -DskipTests -Pjava8-tests + +Java 8 tests are run when -Pjava8-tests profile is enabled, they will run inspite of -DskipTests. For these tests to run java 8 should be installed on the system running the tests. From 26eb3f60ae421c07522952c1334ad9a16e3bd822 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Mon, 3 Mar 2014 13:54:24 +0530 Subject: [PATCH 5/9] Patrick's comments on PR. Added a "Upgrading from pre-1.0 versions of Spark" section in the Java programming guide. Added brief README file in the java8-tests directory that explains what it is? Fixed "When running the tests in Maven, all of the output is sent console, and not the test summaries as they were running." Fixed " hard to get SBT to use the correct Java version without setting Java 8 as my system default." added a warning to dev/run-tests script if the Java version is less than 1.8 Moved java8-tests folder into a new folder called /extras --- dev/run-tests | 10 ++ docs/building-with-maven.md | 6 +- docs/java-programming-guide.md | 19 ++- extras/java8-tests/README.md | 13 ++ {java8-tests => extras/java8-tests}/pom.xml | 115 ++++++------------ .../java/org/apache/spark/Java8APISuite.java | 0 .../apache/spark/streaming/Java8APISuite.java | 0 pom.xml | 2 +- project/SparkBuild.scala | 6 +- sbt/sbt-launch-lib.bash | 8 +- .../apache/spark/streaming/JavaAPISuite.java | 12 +- 11 files changed, 100 insertions(+), 91 deletions(-) create mode 100644 extras/java8-tests/README.md rename {java8-tests => extras/java8-tests}/pom.xml (58%) rename {java8-tests => extras/java8-tests}/src/test/java/org/apache/spark/Java8APISuite.java (100%) rename {java8-tests => extras/java8-tests}/src/test/java/org/apache/spark/streaming/Java8APISuite.java (100%) diff --git a/dev/run-tests b/dev/run-tests index d65a397b4c8c7..cf0b940c09a81 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -27,6 +27,16 @@ rm -rf ./work # Fail fast set -e +if test -x "$JAVA_HOME/bin/java"; then + declare java_cmd="$JAVA_HOME/bin/java" +else + declare java_cmd=java +fi + +JAVA_VERSION=$($java_cmd -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q') +[ "$JAVA_VERSION" -ge 18 ] && echo "" || echo "[Warn] Java 8 tests will not run, because JDK version is < 1.8." + + echo "=========================================================================" echo "Running Scala style checks" echo "=========================================================================" diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md index d785414c00343..a982c4dbac7d4 100644 --- a/docs/building-with-maven.md +++ b/docs/building-with-maven.md @@ -83,6 +83,8 @@ The debian package can then be found under assembly/target. We added the short c Running only java 8 tests and nothing else. - $ mvn test -DskipTests -Pjava8-tests + $ mvn install -DskipTests -Pjava8-tests -Java 8 tests are run when -Pjava8-tests profile is enabled, they will run inspite of -DskipTests. For these tests to run java 8 should be installed on the system running the tests. +Java 8 tests are run when -Pjava8-tests profile is enabled, they will run in spite of -DskipTests. +For these tests to run your system must have a JDK 8 installation. +If you have JDK 8 installed but it is not the system default, you can set JAVA_HOME to point to JDK 8 before running the tests. diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md index 469abe45e33e5..a4ce2381ad918 100644 --- a/docs/java-programming-guide.md +++ b/docs/java-programming-guide.md @@ -17,6 +17,21 @@ which support the same methods as their Scala counterparts but take Java functio Java data and collection types. The main differences have to do with passing functions to RDD operations (e.g. map) and handling RDDs of different types, as discussed next. +# Upgrading from pre-1.0 versions of Spark + +There are following API changes for codebases written in pre-1.0 versions of Spark. + +* All `org.apache.spark.api.java.function.*` abstract classes are now interfaces. + So this means that concrete implementations of these `Function` abstract classes will + have `implements` instead of extends. +* APIs of map and flatMap in core and map, flatMap and transform in streaming + are changed and are defined on the basis of the passed anonymous function's + return type, for example mapToPair(...) or flatMapToPair returns + [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD), + similarly mapToDouble and flatMapToDouble returns + [`JavaDoubleRDD`](api/core/index.html#org.apache.spark.api.java.JavaDoubleRDD). + Please check the API documentation for more details. + # Key Differences in the Java API There are a few key differences between the Java and Scala APIs: @@ -30,7 +45,7 @@ There are a few key differences between the Java and Scala APIs: classes for key-value pairs and doubles. For example, [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD) stores key-value pairs. -* To support upcoming java 8 lambda expression, methods are defined on the basis of +* To support java 8 lambda expression, methods are defined on the basis of the passed anonymous function's (a.k.a lambda expression) return type, for example mapToPair(...) or flatMapToPair returns [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD), @@ -139,7 +154,7 @@ lambda expression as follows: JavaRDD words = lines.flatMap(s -> Arrays.asList(s.split(" "))); {% endhighlight %} -Same possibility applies to all passed in anonymous classes in java 8. +This lambda syntax can be applied to all anonymous classes in Java 8. Continuing with the word count example, we map each word to a `(word, 1)` pair: diff --git a/extras/java8-tests/README.md b/extras/java8-tests/README.md new file mode 100644 index 0000000000000..20a067f2db919 --- /dev/null +++ b/extras/java8-tests/README.md @@ -0,0 +1,13 @@ +# Java 8 test suites. + +These tests are bundled with spark and run if you have java 8 installed as system default or your `JAVA_HOME` points to a java 8(or higher) installation. `JAVA_HOME` is preferred to system default jdk installation. Since these tests require jdk 8 or higher, they defined to be optional to run in the build system. + +For sbt users, it automatically detects the presence of java 8 based on either `JAVA_HOME` environment variable or default installed jdk and run these tests. + +For maven users, + +This automatic detection is not possible and thus user has to ensure that either `JAVA_HOME` environment variable or default installed jdk points to jdk 8. + +`$ mvn install -Pjava8-tests` + +Above command can only be run from project root directory since this module depends on both core and test-jars of core and streaming. These jars are installed first time the above command is run as java8-tests profile enables local publishing of test-jar artifacts as well. Once these artifacts are published then these tests can be run from this module's directory as well. diff --git a/java8-tests/pom.xml b/extras/java8-tests/pom.xml similarity index 58% rename from java8-tests/pom.xml rename to extras/java8-tests/pom.xml index 31c699e7a93c0..1aea280b7ed84 100644 --- a/java8-tests/pom.xml +++ b/extras/java8-tests/pom.xml @@ -61,98 +61,55 @@ java8-tests - - - - org.apache.maven.plugins - maven-surefire-plugin - - - test - - test - - - - - false - - **/Suite*.java - **/*Suite.java - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - test-compile-first - process-test-resources - - testCompile - - - - - true - true - true - 1.8 - 1.8 - 1.8 - UTF-8 - 1024m - - - - - net.alchim31.maven - scala-maven-plugin - - - none - - - scala-compile-first - none - - - scala-test-compile-first - none - - - attach-scaladocs - none - - - - - org.scalatest - scalatest-maven-plugin - - - test - none - - - - - + + org.apache.maven.plugins + maven-surefire-plugin + + + test + + test + + + + + false + + **/Suite*.java + **/*Suite.java + + true + + org.apache.maven.plugins maven-compiler-plugin - none + test-compile-first + process-test-resources + + testCompile + + + true + true + true + 1.8 + 1.8 + 1.8 + UTF-8 + 1024m + + net.alchim31.maven scala-maven-plugin diff --git a/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java b/extras/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java similarity index 100% rename from java8-tests/src/test/java/org/apache/spark/Java8APISuite.java rename to extras/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java diff --git a/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java b/extras/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java similarity index 100% rename from java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java rename to extras/java8-tests/src/test/java/org/apache/spark/streaming/Java8APISuite.java diff --git a/pom.xml b/pom.xml index 12ec46d517aad..c59fada5cd4a0 100644 --- a/pom.xml +++ b/pom.xml @@ -732,7 +732,7 @@ - java8-tests + extras/java8-tests diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 4b1fa0227d909..19a4d2e26b1b1 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -95,7 +95,10 @@ object SparkBuild extends Build { lazy val javaVersion = System.getProperty("java.specification.version") lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]() - lazy val java8Tests = Project("java8-tests", file("java8-tests"), settings = java8TestsSettings) dependsOn(core) dependsOn(streaming % "compile->compile;test->test") + lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings). + dependsOn(core) dependsOn(streaming % "compile->compile;test->test") + + val javaHomeEnv = Properties.envOrNone("JAVA_HOME").map(file) // Conditionally include the yarn sub-project lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core) @@ -140,6 +143,7 @@ object SparkBuild extends Build { javacOptions := Seq("-target", JAVAC_JVM_VERSION, "-source", JAVAC_JVM_VERSION), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, + if (javaHomeEnv.isDefined) javaHome := javaHomeEnv else javaHome <<= javaHome in Compile, // This is to add convenience of enabling sbt -Dsbt.offline=true for making the build offline. offline := "true".equalsIgnoreCase(sys.props("sbt.offline")), retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", diff --git a/sbt/sbt-launch-lib.bash b/sbt/sbt-launch-lib.bash index d65bbdc19ce03..f8ad5dfd6711c 100755 --- a/sbt/sbt-launch-lib.bash +++ b/sbt/sbt-launch-lib.bash @@ -16,7 +16,13 @@ declare -a residual_args declare -a java_args declare -a scalac_args declare -a sbt_commands -declare java_cmd=java + +if test -x "$JAVA_HOME/bin/java"; then + echo -e "using $JAVA_HOME for launching sbt." + declare java_cmd="$JAVA_HOME/bin/java" +else + declare java_cmd=java +fi echoerr () { echo 1>&2 "$@" diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java index a6f8bcc4b8071..e93bf18b6d0b9 100644 --- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java +++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java @@ -744,11 +744,13 @@ public void testPairFlatMap() { JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaPairDStream flatMapped = stream.flatMapToPair( new PairFlatMapFunction() { - @Override - public Iterable> call(String in) throws Exception { - List> out = Lists.newArrayList(); - for (String letter: in.split("(?!^)")) { - out.add(new Tuple2(in.length(), letter)); + @Override + public Iterable> call(String in) throws Exception { + List> out = Lists.newArrayList(); + for (String letter: in.split("(?!^)")) { + out.add(new Tuple2(in.length(), letter)); + } + return out; } }); JavaTestUtils.attachTestOutputStream(flatMapped); From 80a13e8b9a2d49a1de5dee263102ac180a9b7077 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Mon, 3 Mar 2014 15:15:45 +0530 Subject: [PATCH 6/9] Used fake class tag syntax --- .../streaming/api/java/JavaDStreamLike.scala | 64 ++++++--------- .../streaming/api/java/JavaPairDStream.scala | 80 +++++++------------ 2 files changed, 56 insertions(+), 88 deletions(-) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala index ccdd1ac6794f8..a85cd04c9319c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala @@ -139,7 +139,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T /** Return a new DStream by applying a function to all elements of this DStream. */ def mapToPair[K2, V2](f: PairFunction[T, K2, V2]): JavaPairDStream[K2, V2] = { - def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]] + def cm: ClassTag[(K2, V2)] = fakeClassTag new JavaPairDStream(dstream.map[(K2, V2)](f)(cm))(fakeClassTag[K2], fakeClassTag[V2]) } @@ -160,7 +160,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T def flatMapToPair[K2, V2](f: PairFlatMapFunction[T, K2, V2]): JavaPairDStream[K2, V2] = { import scala.collection.JavaConverters._ def fn = (x: T) => f.call(x).asScala - def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]] + def cm: ClassTag[(K2, V2)] = fakeClassTag new JavaPairDStream(dstream.flatMap(fn)(cm))(fakeClassTag[K2], fakeClassTag[V2]) } @@ -284,8 +284,8 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T * on each RDD of 'this' DStream. */ def transform[U](transformFunc: JFunction[R, JavaRDD[U]]): JavaDStream[U] = { - implicit val cm: ClassTag[U] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]] + implicit val cm: ClassTag[U] = fakeClassTag + def scalaTransform (in: RDD[T]): RDD[U] = transformFunc.call(wrapRDD(in)).rdd dstream.transform(scalaTransform(_)) @@ -296,8 +296,8 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T * on each RDD of 'this' DStream. */ def transform[U](transformFunc: JFunction2[R, Time, JavaRDD[U]]): JavaDStream[U] = { - implicit val cm: ClassTag[U] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]] + implicit val cm: ClassTag[U] = fakeClassTag + def scalaTransform (in: RDD[T], time: Time): RDD[U] = transformFunc.call(wrapRDD(in), time).rdd dstream.transform(scalaTransform(_, _)) @@ -309,10 +309,9 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T */ def transformToPair[K2, V2](transformFunc: JFunction[R, JavaPairRDD[K2, V2]]): JavaPairDStream[K2, V2] = { - implicit val cmk: ClassTag[K2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]] - implicit val cmv: ClassTag[V2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]] + implicit val cmk: ClassTag[K2] = fakeClassTag + implicit val cmv: ClassTag[V2] = fakeClassTag + def scalaTransform (in: RDD[T]): RDD[(K2, V2)] = transformFunc.call(wrapRDD(in)).rdd dstream.transform(scalaTransform(_)) @@ -324,10 +323,9 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T */ def transformToPair[K2, V2](transformFunc: JFunction2[R, Time, JavaPairRDD[K2, V2]]): JavaPairDStream[K2, V2] = { - implicit val cmk: ClassTag[K2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]] - implicit val cmv: ClassTag[V2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]] + implicit val cmk: ClassTag[K2] = fakeClassTag + implicit val cmv: ClassTag[V2] = fakeClassTag + def scalaTransform (in: RDD[T], time: Time): RDD[(K2, V2)] = transformFunc.call(wrapRDD(in), time).rdd dstream.transform(scalaTransform(_, _)) @@ -341,10 +339,9 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T other: JavaDStream[U], transformFunc: JFunction3[R, JavaRDD[U], Time, JavaRDD[W]] ): JavaDStream[W] = { - implicit val cmu: ClassTag[U] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]] - implicit val cmv: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cmu: ClassTag[U] = fakeClassTag + implicit val cmv: ClassTag[W] = fakeClassTag + def scalaTransform (inThis: RDD[T], inThat: RDD[U], time: Time): RDD[W] = transformFunc.call(wrapRDD(inThis), other.wrapRDD(inThat), time).rdd dstream.transformWith[U, W](other.dstream, scalaTransform(_, _, _)) @@ -358,12 +355,9 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T other: JavaDStream[U], transformFunc: JFunction3[R, JavaRDD[U], Time, JavaPairRDD[K2, V2]] ): JavaPairDStream[K2, V2] = { - implicit val cmu: ClassTag[U] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]] - implicit val cmk2: ClassTag[K2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]] - implicit val cmv2: ClassTag[V2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]] + implicit val cmu: ClassTag[U] = fakeClassTag + implicit val cmk2: ClassTag[K2] = fakeClassTag + implicit val cmv2: ClassTag[V2] = fakeClassTag def scalaTransform (inThis: RDD[T], inThat: RDD[U], time: Time): RDD[(K2, V2)] = transformFunc.call(wrapRDD(inThis), other.wrapRDD(inThat), time).rdd dstream.transformWith[U, (K2, V2)](other.dstream, scalaTransform(_, _, _)) @@ -377,12 +371,10 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T other: JavaPairDStream[K2, V2], transformFunc: JFunction3[R, JavaPairRDD[K2, V2], Time, JavaRDD[W]] ): JavaDStream[W] = { - implicit val cmk2: ClassTag[K2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]] - implicit val cmv2: ClassTag[V2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]] - implicit val cmw: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cmk2: ClassTag[K2] = fakeClassTag + implicit val cmv2: ClassTag[V2] = fakeClassTag + implicit val cmw: ClassTag[W] = fakeClassTag + def scalaTransform (inThis: RDD[T], inThat: RDD[(K2, V2)], time: Time): RDD[W] = transformFunc.call(wrapRDD(inThis), other.wrapRDD(inThat), time).rdd dstream.transformWith[(K2, V2), W](other.dstream, scalaTransform(_, _, _)) @@ -396,14 +388,10 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T other: JavaPairDStream[K2, V2], transformFunc: JFunction3[R, JavaPairRDD[K2, V2], Time, JavaPairRDD[K3, V3]] ): JavaPairDStream[K3, V3] = { - implicit val cmk2: ClassTag[K2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]] - implicit val cmv2: ClassTag[V2] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]] - implicit val cmk3: ClassTag[K3] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K3]] - implicit val cmv3: ClassTag[V3] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V3]] + implicit val cmk2: ClassTag[K2] = fakeClassTag + implicit val cmv2: ClassTag[V2] = fakeClassTag + implicit val cmk3: ClassTag[K3] = fakeClassTag + implicit val cmv3: ClassTag[V3] = fakeClassTag def scalaTransform (inThis: RDD[T], inThat: RDD[(K2, V2)], time: Time): RDD[(K3, V3)] = transformFunc.call(wrapRDD(inThis), other.wrapRDD(inThat), time).rdd dstream.transformWith[(K2, V2), (K3, V3)](other.dstream, scalaTransform(_, _, _)) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala index 14c846066906d..ac451d1913aaa 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala @@ -17,25 +17,25 @@ package org.apache.spark.streaming.api.java -import java.util.{List => JList} import java.lang.{Long => JLong} +import java.util.{List => JList} import scala.collection.JavaConversions._ import scala.reflect.ClassTag -import org.apache.spark.streaming._ -import org.apache.spark.streaming.StreamingContext._ -import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2} -import org.apache.spark.Partitioner +import com.google.common.base.Optional +import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.{JobConf, OutputFormat} import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} -import org.apache.hadoop.conf.Configuration -import org.apache.spark.api.java.{JavaUtils, JavaPairRDD} +import org.apache.spark.Partitioner +import org.apache.spark.api.java.{JavaPairRDD, JavaUtils} import org.apache.spark.api.java.JavaPairRDD._ -import org.apache.spark.storage.StorageLevel -import com.google.common.base.Optional +import org.apache.spark.api.java.JavaSparkContext.fakeClassTag +import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2} import org.apache.spark.rdd.RDD -import org.apache.spark.rdd.PairRDDFunctions +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming._ +import org.apache.spark.streaming.StreamingContext._ import org.apache.spark.streaming.dstream.DStream /** @@ -169,8 +169,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( mergeCombiners: JFunction2[C, C, C], partitioner: Partitioner ): JavaPairDStream[K, C] = { - implicit val cm: ClassTag[C] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[C]] + implicit val cm: ClassTag[C] = fakeClassTag dstream.combineByKey(createCombiner, mergeValue, mergeCombiners, partitioner) } @@ -185,8 +184,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( partitioner: Partitioner, mapSideCombine: Boolean ): JavaPairDStream[K, C] = { - implicit val cm: ClassTag[C] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[C]] + implicit val cm: ClassTag[C] = fakeClassTag dstream.combineByKey(createCombiner, mergeValue, mergeCombiners, partitioner, mapSideCombine) } @@ -454,8 +452,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( */ def updateStateByKey[S](updateFunc: JFunction2[JList[V], Optional[S], Optional[S]]) : JavaPairDStream[K, S] = { - implicit val cm: ClassTag[S] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[S]] + implicit val cm: ClassTag[S] = fakeClassTag dstream.updateStateByKey(convertUpdateStateFunction(updateFunc)) } @@ -472,8 +469,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( updateFunc: JFunction2[JList[V], Optional[S], Optional[S]], numPartitions: Int) : JavaPairDStream[K, S] = { - implicit val cm: ClassTag[S] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[S]] + implicit val cm: ClassTag[S] = fakeClassTag dstream.updateStateByKey(convertUpdateStateFunction(updateFunc), numPartitions) } @@ -491,8 +487,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( updateFunc: JFunction2[JList[V], Optional[S], Optional[S]], partitioner: Partitioner ): JavaPairDStream[K, S] = { - implicit val cm: ClassTag[S] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[S]] + implicit val cm: ClassTag[S] = fakeClassTag dstream.updateStateByKey(convertUpdateStateFunction(updateFunc), partitioner) } @@ -502,8 +497,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * 'this' DStream without changing the key. */ def mapValues[U](f: JFunction[V, U]): JavaPairDStream[K, U] = { - implicit val cm: ClassTag[U] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]] + implicit val cm: ClassTag[U] = fakeClassTag dstream.mapValues(f) } @@ -525,8 +519,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * of partitions. */ def cogroup[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (JList[V], JList[W])] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag dstream.cogroup(other.dstream).mapValues(t => (seqAsJavaList(t._1), seqAsJavaList((t._2)))) } @@ -538,8 +531,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( other: JavaPairDStream[K, W], numPartitions: Int ): JavaPairDStream[K, (JList[V], JList[W])] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag dstream.cogroup(other.dstream, numPartitions) .mapValues(t => (seqAsJavaList(t._1), seqAsJavaList((t._2)))) } @@ -552,8 +544,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( other: JavaPairDStream[K, W], partitioner: Partitioner ): JavaPairDStream[K, (JList[V], JList[W])] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag dstream.cogroup(other.dstream, partitioner) .mapValues(t => (seqAsJavaList(t._1), seqAsJavaList((t._2)))) } @@ -563,8 +554,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * Hash partitioning is used to generate the RDDs with Spark's default number of partitions. */ def join[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (V, W)] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag dstream.join(other.dstream) } @@ -573,8 +563,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * Hash partitioning is used to generate the RDDs with `numPartitions` partitions. */ def join[W](other: JavaPairDStream[K, W], numPartitions: Int): JavaPairDStream[K, (V, W)] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag dstream.join(other.dstream, numPartitions) } @@ -586,8 +575,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( other: JavaPairDStream[K, W], partitioner: Partitioner ): JavaPairDStream[K, (V, W)] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag dstream.join(other.dstream, partitioner) } @@ -597,8 +585,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * number of partitions. */ def leftOuterJoin[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (V, Optional[W])] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag val joinResult = dstream.leftOuterJoin(other.dstream) joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))} } @@ -612,8 +599,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( other: JavaPairDStream[K, W], numPartitions: Int ): JavaPairDStream[K, (V, Optional[W])] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag val joinResult = dstream.leftOuterJoin(other.dstream, numPartitions) joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))} } @@ -626,8 +612,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( other: JavaPairDStream[K, W], partitioner: Partitioner ): JavaPairDStream[K, (V, Optional[W])] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag val joinResult = dstream.leftOuterJoin(other.dstream, partitioner) joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))} } @@ -653,8 +638,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( other: JavaPairDStream[K, W], numPartitions: Int ): JavaPairDStream[K, (Optional[V], W)] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag val joinResult = dstream.rightOuterJoin(other.dstream, numPartitions) joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)} } @@ -668,8 +652,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( other: JavaPairDStream[K, W], partitioner: Partitioner ): JavaPairDStream[K, (Optional[V], W)] = { - implicit val cm: ClassTag[W] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]] + implicit val cm: ClassTag[W] = fakeClassTag val joinResult = dstream.rightOuterJoin(other.dstream, partitioner) joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)} } @@ -749,8 +732,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( new JavaDStream[(K, V)](dstream) } - override val classTag: ClassTag[(K, V)] = - implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K, V]]] + override val classTag: ClassTag[(K, V)] = fakeClassTag } object JavaPairDStream { @@ -759,10 +741,8 @@ object JavaPairDStream { } def fromJavaDStream[K, V](dstream: JavaDStream[(K, V)]): JavaPairDStream[K, V] = { - implicit val cmk: ClassTag[K] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]] - implicit val cmv: ClassTag[V] = - implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]] + implicit val cmk: ClassTag[K] = fakeClassTag + implicit val cmv: ClassTag[V] = fakeClassTag new JavaPairDStream[K, V](dstream.dstream) } From 673f7ac9e8855e3be16e2e955d0c01d1b187073a Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Mon, 3 Mar 2014 15:54:21 +0530 Subject: [PATCH 7/9] Added support for -java-home as well --- extras/java8-tests/README.md | 12 +++++++----- .../test/java/org/apache/spark/Java8APISuite.java | 13 +++++++++++++ project/SparkBuild.scala | 4 +--- sbt/sbt-launch-lib.bash | 2 +- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/extras/java8-tests/README.md b/extras/java8-tests/README.md index 20a067f2db919..87a7319e04416 100644 --- a/extras/java8-tests/README.md +++ b/extras/java8-tests/README.md @@ -2,12 +2,14 @@ These tests are bundled with spark and run if you have java 8 installed as system default or your `JAVA_HOME` points to a java 8(or higher) installation. `JAVA_HOME` is preferred to system default jdk installation. Since these tests require jdk 8 or higher, they defined to be optional to run in the build system. -For sbt users, it automatically detects the presence of java 8 based on either `JAVA_HOME` environment variable or default installed jdk and run these tests. +* For sbt users, it automatically detects the presence of java 8 based on either `JAVA_HOME` environment variable or default installed jdk and run these tests. It takes highest precednce, if java home is passed as follows. -For maven users, + `$ sbt/sbt -java-home "/path/to/jdk1.8.0"` -This automatic detection is not possible and thus user has to ensure that either `JAVA_HOME` environment variable or default installed jdk points to jdk 8. +* For maven users, -`$ mvn install -Pjava8-tests` + This automatic detection is not possible and thus user has to ensure that either `JAVA_HOME` environment variable or default installed jdk points to jdk 8. -Above command can only be run from project root directory since this module depends on both core and test-jars of core and streaming. These jars are installed first time the above command is run as java8-tests profile enables local publishing of test-jar artifacts as well. Once these artifacts are published then these tests can be run from this module's directory as well. + `$ mvn install -Pjava8-tests` + + Above command can only be run from project root directory since this module depends on both core and test-jars of core and streaming. These jars are installed first time the above command is run as java8-tests profile enables local publishing of test-jar artifacts as well. Once these artifacts are published then these tests can be run from this module's directory as well. diff --git a/extras/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java b/extras/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java index d70fe8cb12118..f67251217ed4a 100644 --- a/extras/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java +++ b/extras/java8-tests/src/test/java/org/apache/spark/Java8APISuite.java @@ -60,6 +60,19 @@ public void tearDown() { System.clearProperty("spark.driver.port"); } + @Test + public void foreachWithAnonymousClass() { + foreachCalls = 0; + JavaRDD rdd = sc.parallelize(Arrays.asList("Hello", "World")); + rdd.foreach(new VoidFunction() { + @Override + public void call(String s) { + foreachCalls++; + } + }); + Assert.assertEquals(2, foreachCalls); + } + @Test public void foreach() { foreachCalls = 0; diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 19a4d2e26b1b1..aa1784897566b 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -98,8 +98,6 @@ object SparkBuild extends Build { lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings). dependsOn(core) dependsOn(streaming % "compile->compile;test->test") - val javaHomeEnv = Properties.envOrNone("JAVA_HOME").map(file) - // Conditionally include the yarn sub-project lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core) lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core) @@ -143,7 +141,7 @@ object SparkBuild extends Build { javacOptions := Seq("-target", JAVAC_JVM_VERSION, "-source", JAVAC_JVM_VERSION), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, - if (javaHomeEnv.isDefined) javaHome := javaHomeEnv else javaHome <<= javaHome in Compile, + javaHome := Properties.envOrNone("JAVA_HOME").map(file), // This is to add convenience of enabling sbt -Dsbt.offline=true for making the build offline. offline := "true".equalsIgnoreCase(sys.props("sbt.offline")), retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", diff --git a/sbt/sbt-launch-lib.bash b/sbt/sbt-launch-lib.bash index f8ad5dfd6711c..93f6cce3fcb58 100755 --- a/sbt/sbt-launch-lib.bash +++ b/sbt/sbt-launch-lib.bash @@ -137,7 +137,7 @@ process_args () { -sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;; -sbt-version) require_arg version "$1" "$2" && sbt_version="$2" && shift 2 ;; - -java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && shift 2 ;; + -java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && export JAVA_HOME=$2 && shift 2 ;; -D*) addJava "$1" && shift ;; -J*) addJava "${1:2}" && shift ;; From 85a954eefbb310dfa6566e64e1b1162e1aa6dea6 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Mon, 3 Mar 2014 16:07:00 +0530 Subject: [PATCH 8/9] Nit. import orderings. --- .../apache/spark/api/java/function/PairFlatMapFunction.java | 3 +-- .../java/org/apache/spark/api/java/function/PairFunction.java | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java b/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java index c5c65d40bc2fb..691ef2eceb1f6 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java +++ b/core/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java @@ -17,10 +17,9 @@ package org.apache.spark.api.java.function; -import scala.Tuple2; - import java.io.Serializable; +import scala.Tuple2; /** * A function that returns zero or more key-value pair records from each input record. The diff --git a/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java index d32cf30c624b4..abd9bcc07ac61 100644 --- a/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java +++ b/core/src/main/java/org/apache/spark/api/java/function/PairFunction.java @@ -17,10 +17,10 @@ package org.apache.spark.api.java.function; -import scala.Tuple2; - import java.io.Serializable; +import scala.Tuple2; + /** * A function that returns key-value pairs (Tuple2), and can be used to construct PairRDDs. */ From 95850e6e58b83b59e1f679c7b1cd8aaa7df854dc Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Mon, 3 Mar 2014 14:46:14 -0800 Subject: [PATCH 9/9] Some doc improvements and build changes to the Java 8 patch. --- docs/java-programming-guide.md | 57 ++++++++++--------- extras/README.md | 1 + extras/java8-tests/README.md | 25 +++++--- extras/java8-tests/pom.xml | 12 +++- .../src/test/resources/log4j.properties | 28 +++++++++ sbt/sbt-launch-lib.bash | 3 +- 6 files changed, 86 insertions(+), 40 deletions(-) create mode 100644 extras/README.md create mode 100644 extras/java8-tests/src/test/resources/log4j.properties diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md index a4ce2381ad918..6632360f6e3ca 100644 --- a/docs/java-programming-guide.md +++ b/docs/java-programming-guide.md @@ -17,36 +17,21 @@ which support the same methods as their Scala counterparts but take Java functio Java data and collection types. The main differences have to do with passing functions to RDD operations (e.g. map) and handling RDDs of different types, as discussed next. -# Upgrading from pre-1.0 versions of Spark - -There are following API changes for codebases written in pre-1.0 versions of Spark. - -* All `org.apache.spark.api.java.function.*` abstract classes are now interfaces. - So this means that concrete implementations of these `Function` abstract classes will - have `implements` instead of extends. -* APIs of map and flatMap in core and map, flatMap and transform in streaming - are changed and are defined on the basis of the passed anonymous function's - return type, for example mapToPair(...) or flatMapToPair returns - [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD), - similarly mapToDouble and flatMapToDouble returns - [`JavaDoubleRDD`](api/core/index.html#org.apache.spark.api.java.JavaDoubleRDD). - Please check the API documentation for more details. - # Key Differences in the Java API There are a few key differences between the Java and Scala APIs: -* Java does not support anonymous or first-class functions, so functions must - be implemented by extending the +* Java does not support anonymous or first-class functions, so functions are passed + using anonymous classes that implement the [`org.apache.spark.api.java.function.Function`](api/core/index.html#org.apache.spark.api.java.function.Function), [`Function2`](api/core/index.html#org.apache.spark.api.java.function.Function2), etc. - classes. + interfaces. * To maintain type safety, the Java API defines specialized Function and RDD classes for key-value pairs and doubles. For example, [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD) stores key-value pairs. -* To support java 8 lambda expression, methods are defined on the basis of - the passed anonymous function's (a.k.a lambda expression) return type, +* Some methods are defined on the basis of the passed anonymous function's + (a.k.a lambda expression) return type, for example mapToPair(...) or flatMapToPair returns [`JavaPairRDD`](api/core/index.html#org.apache.spark.api.java.JavaPairRDD), similarly mapToDouble and flatMapToDouble returns @@ -74,10 +59,10 @@ each specialized RDD class, so filtering a `PairRDD` returns a new `PairRDD`, etc (this acheives the "same-result-type" principle used by the [Scala collections framework](http://docs.scala-lang.org/overviews/core/architecture-of-scala-collections.html)). -## Function Classes +## Function Interfaces -The following table lists the function classes used by the Java API. Each -class has a single abstract method, `call()`, that must be implemented. +The following table lists the function interfaces used by the Java API. Each +interface has a single abstract method, `call()`, that must be implemented. @@ -106,6 +91,21 @@ The Java API supports other Spark features, including [broadcast variables](scala-programming-guide.html#broadcast-variables), and [caching](scala-programming-guide.html#rdd-persistence). +# Upgrading From Pre-1.0 Versions of Spark + +In version 1.0 of Spark the Java API was refactored to better support Java 8 +lambda expressions. Users upgrading from older versions of Spark should note +the following changes: + +* All `org.apache.spark.api.java.function.*` have been changed from abstract + classes to interfaces. This means that concrete implementations of these + `Function` classes will need to use `implements` rather than `extends`. +* Certain transformation functions now have multiple versions depending + on the return type. In Spark core, the map functions (map, flatMap, + mapPartitons) have type-specific versions, e.g. + [`mapToPair`](api/core/index.html#org.apache.spark.api.java.JavaRDD@mapToPair[K2,V2](f:org.apache.spark.api.java.function.PairFunction[T,K2,V2]):org.apache.spark.api.java.JavaPairRDD[K2,V2]) + and [`mapToDouble`](api/core/index.html#org.apache.spark.api.java.JavaRDD@mapToDouble[R](f:org.apache.spark.api.java.function.DoubleFunction[T]):org.apache.spark.api.java.JavaDoubleRDD). + Spark Streaming also uses the same approach, e.g. [`transformToPair`](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaDStream@transformToPair[K2,V2](transformFunc:org.apache.spark.api.java.function.Function[R,org.apache.spark.api.java.JavaPairRDD[K2,V2]]):org.apache.spark.streaming.api.java.JavaPairDStream[K2,V2]). # Example @@ -147,8 +147,8 @@ class Split extends FlatMapFunction { JavaRDD words = lines.flatMap(new Split()); {% endhighlight %} -Java 8+ users can also possibly write the above `FlatMapFunction` in a more concise way using -lambda expression as follows: +Java 8+ users can also write the above `FlatMapFunction` in a more concise way using +a lambda expression: {% highlight java %} JavaRDD words = lines.flatMap(s -> Arrays.asList(s.split(" "))); @@ -209,10 +209,11 @@ just a matter of style. We currently provide documentation for the Java API as Scaladoc, in the [`org.apache.spark.api.java` package](api/core/index.html#org.apache.spark.api.java.package), because -some of the classes are implemented in Scala. The main downside is that the types and function +some of the classes are implemented in Scala. It is important to note that the types and function definitions show Scala syntax (for example, `def reduce(func: Function2[T, T]): T` instead of -`T reduce(Function2 func)`). -We hope to generate documentation with Java-style syntax in the future. +`T reduce(Function2 func)`). In addition, the Scala `trait` modifier is used for Java +interface classes. We hope to generate documentation with Java-style syntax in the future to +avoid these quirks. # Where to Go from Here diff --git a/extras/README.md b/extras/README.md new file mode 100644 index 0000000000000..1b4174b7d5cff --- /dev/null +++ b/extras/README.md @@ -0,0 +1 @@ +This directory contains build components not included by default in Spark's build. diff --git a/extras/java8-tests/README.md b/extras/java8-tests/README.md index 87a7319e04416..e95b73ac7702a 100644 --- a/extras/java8-tests/README.md +++ b/extras/java8-tests/README.md @@ -1,15 +1,24 @@ -# Java 8 test suites. +# Java 8 Test Suites -These tests are bundled with spark and run if you have java 8 installed as system default or your `JAVA_HOME` points to a java 8(or higher) installation. `JAVA_HOME` is preferred to system default jdk installation. Since these tests require jdk 8 or higher, they defined to be optional to run in the build system. +These tests require having Java 8 installed and are isolated from the main Spark build. +If Java 8 is not your system's default Java version, you will need to point Spark's build +to your Java location. The set-up depends a bit on the build system: -* For sbt users, it automatically detects the presence of java 8 based on either `JAVA_HOME` environment variable or default installed jdk and run these tests. It takes highest precednce, if java home is passed as follows. +* Sbt users can either set JAVA_HOME to the location of a Java 8 JDK or explicitly pass + `-java-home` to the sbt launch script. If a Java 8 JDK is detected sbt will automatically + include the Java 8 test project. - `$ sbt/sbt -java-home "/path/to/jdk1.8.0"` + `$ JAVA_HOME=/opt/jdk1.8.0/ sbt/sbt clean "test-only org.apache.spark.Java8APISuite"` -* For maven users, +* For Maven users, - This automatic detection is not possible and thus user has to ensure that either `JAVA_HOME` environment variable or default installed jdk points to jdk 8. + Maven users can also refer to their Java 8 directory using JAVA_HOME. However, Maven will not + automatically detect the presence of a Java 8 JDK, so a special build profile `-Pjava8-tests` + must be used. - `$ mvn install -Pjava8-tests` + `$ JAVA_HOME=/opt/jdk1.8.0/ mvn clean install -DskipTests` + `$ JAVA_HOME=/opt/jdk1.8.0/ mvn test -Pjava8-tests -DwildcardSuites=org.apache.spark.Java8APISuite` - Above command can only be run from project root directory since this module depends on both core and test-jars of core and streaming. These jars are installed first time the above command is run as java8-tests profile enables local publishing of test-jar artifacts as well. Once these artifacts are published then these tests can be run from this module's directory as well. + Note that the above command can only be run from project root directory since this module + depends on core and the test-jars of core and streaming. This means an install step is + required to make the test dependencies visible to the Java 8 sub-project. diff --git a/extras/java8-tests/pom.xml b/extras/java8-tests/pom.xml index 1aea280b7ed84..602f66f9c5cf1 100644 --- a/extras/java8-tests/pom.xml +++ b/extras/java8-tests/pom.xml @@ -20,8 +20,8 @@ org.apache.spark spark-parent - 1.0.0-incubating-SNAPSHOT - ../pom.xml + 1.0.0-SNAPSHOT + ../../pom.xml org.apache.spark @@ -77,12 +77,18 @@ + + + + file:src/test/resources/log4j.properties + + false **/Suite*.java **/*Suite.java - true diff --git a/extras/java8-tests/src/test/resources/log4j.properties b/extras/java8-tests/src/test/resources/log4j.properties new file mode 100644 index 0000000000000..180beaa8cc5a7 --- /dev/null +++ b/extras/java8-tests/src/test/resources/log4j.properties @@ -0,0 +1,28 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the file target/unit-tests.log +log4j.rootCategory=INFO, file +log4j.appender.file=org.apache.log4j.FileAppender +log4j.appender.file.append=false +log4j.appender.file.file=target/unit-tests.log +log4j.appender.file.layout=org.apache.log4j.PatternLayout +log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n + +# Ignore messages below warning level from Jetty, because it's a bit verbose +log4j.logger.org.eclipse.jetty=WARN +org.eclipse.jetty.LEVEL=WARN diff --git a/sbt/sbt-launch-lib.bash b/sbt/sbt-launch-lib.bash index 93f6cce3fcb58..00a6b41013e5f 100755 --- a/sbt/sbt-launch-lib.bash +++ b/sbt/sbt-launch-lib.bash @@ -18,7 +18,8 @@ declare -a scalac_args declare -a sbt_commands if test -x "$JAVA_HOME/bin/java"; then - echo -e "using $JAVA_HOME for launching sbt." + echo -e "Using $JAVA_HOME as default JAVA_HOME." + echo "Note, this will be overridden by -java-home if it is set." declare java_cmd="$JAVA_HOME/bin/java" else declare java_cmd=java
ClassFunction Type