From 794a10b4d95f1cae2500d373939dfb85eb8c9189 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Mon, 16 Dec 2019 22:29:53 +0800 Subject: [PATCH 1/9] init pr --- .../scala/org/apache/spark/ml/functions.scala | 38 ++++++++++++++++ .../org/apache/spark/ml/FunctionsSuite.scala | 43 +++++++++++++++++++ python/pyspark/ml/functions.py | 36 ++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 mllib/src/main/scala/org/apache/spark/ml/functions.scala create mode 100644 mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala create mode 100644 python/pyspark/ml/functions.py diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala new file mode 100644 index 0000000000000..50a54bfc573cd --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml + +import org.apache.spark.annotation.Since +import org.apache.spark.ml.linalg.Vector +import org.apache.spark.sql.functions.udf +import org.apache.spark.sql.Column + +// scalastyle:off +@Since("3.0.0") +object functions { +// scalastyle:on + + private[ml] val vector_to_dense_array_udf = udf { v: Vector => v.toArray } + + /** + * Convert MLlib sparse/dense vectors in a DataFrame into dense arrays. + * + * @since 3.0.0 + */ + def vector_to_dense_array(v: Column): Column = vector_to_dense_array_udf(v) +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala new file mode 100644 index 0000000000000..93ff3d6759ece --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml + +import org.apache.spark.ml.functions.vector_to_dense_array +import org.apache.spark.ml.linalg.Vectors +import org.apache.spark.ml.util.MLTest + +class FunctionsSuite extends MLTest { + + import testImplicits._ + + test("test vector_to_dense_array") { + val df1 = Seq( + Tuple1(Vectors.dense(1.0, 2.0, 3.0)), + Tuple1(Vectors.sparse(3, Seq((0, 2.0), (2, 3.0)))) + ).toDF("vec") + + val result = df1.select(vector_to_dense_array('vec)) + .as[Array[Double]].collect() + val expected = Array( + Array(1.0, 2.0, 3.0), + Array(2.0, 0.0, 3.0) + ) + + assert(result === expected) + } +} diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py new file mode 100644 index 0000000000000..17a5abbec5c74 --- /dev/null +++ b/python/pyspark/ml/functions.py @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark import since, SparkContext +from pyspark.sql.column import Column, _to_java_column + +@since(3.0) +def vector_to_dense_array(col): + """ + Convert MLlib sparse/dense vectors in a DataFrame into dense arrays. + + >>> from pyspark.ml.linalg import Vectors + >>> from pyspark.ml.functions import vector_to_dense_array + >>> df = spark.createDataFrame([ + ... (Vectors.dense(1.0, 2.0, 3.0),), + ... (Vectors.sparse(3, [(0, 2.0), (2, 3.0)]),)], ["vec"]) + >>> df.select(vector_to_dense_array("vec").alias("arr")).collect() + [Row(arr=[1.0, 2.0, 3.0]), Row(arr=[2.0, 0.0, 3.0])] + """ + sc = SparkContext._active_spark_context + return Column( + sc._jvm.org.apache.spark.ml.functions.vector_to_dense_array(_to_java_column(col))) From afc71af15d1edce319529724a18f68c1692180a3 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Tue, 17 Dec 2019 00:29:29 +0800 Subject: [PATCH 2/9] fix scala style --- mllib/src/main/scala/org/apache/spark/ml/functions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala index 50a54bfc573cd..cb0c06e1a08de 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/functions.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala @@ -19,8 +19,8 @@ package org.apache.spark.ml import org.apache.spark.annotation.Since import org.apache.spark.ml.linalg.Vector -import org.apache.spark.sql.functions.udf import org.apache.spark.sql.Column +import org.apache.spark.sql.functions.udf // scalastyle:off @Since("3.0.0") From e2bb6c098198f611c3b74289cb53be9a1e187de1 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Tue, 17 Dec 2019 11:25:20 +0800 Subject: [PATCH 3/9] address comments --- .../scala/org/apache/spark/ml/functions.scala | 13 +++++++++-- .../org/apache/spark/ml/FunctionsSuite.scala | 22 ++++++++++--------- python/pyspark/ml/functions.py | 20 +++++++++++------ 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala index cb0c06e1a08de..2fa9b7f5155fe 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/functions.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala @@ -19,6 +19,7 @@ package org.apache.spark.ml import org.apache.spark.annotation.Since import org.apache.spark.ml.linalg.Vector +import org.apache.spark.mllib.linalg.{Vector => OldVector} import org.apache.spark.sql.Column import org.apache.spark.sql.functions.udf @@ -27,12 +28,20 @@ import org.apache.spark.sql.functions.udf object functions { // scalastyle:on - private[ml] val vector_to_dense_array_udf = udf { v: Vector => v.toArray } + private[ml] val vector_to_array_udf = udf { vec: Any => + vec match { + case v: Vector => v.toArray + case v: OldVector => v.toArray + case _ => throw new IllegalArgumentException( + "function vector_to_array require an argument of type " + + "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`.") + } + } /** * Convert MLlib sparse/dense vectors in a DataFrame into dense arrays. * * @since 3.0.0 */ - def vector_to_dense_array(v: Column): Column = vector_to_dense_array_udf(v) + def vector_to_array(v: Column): Column = vector_to_array_udf(v) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala index 93ff3d6759ece..2454f83d2e54c 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala @@ -17,27 +17,29 @@ package org.apache.spark.ml -import org.apache.spark.ml.functions.vector_to_dense_array +import org.apache.spark.ml.functions.vector_to_array import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.util.MLTest +import org.apache.spark.mllib.linalg.{Vectors => OldVectors} class FunctionsSuite extends MLTest { import testImplicits._ - test("test vector_to_dense_array") { + test("test vector_to_array") { val df1 = Seq( - Tuple1(Vectors.dense(1.0, 2.0, 3.0)), - Tuple1(Vectors.sparse(3, Seq((0, 2.0), (2, 3.0)))) - ).toDF("vec") + (Vectors.dense(1.0, 2.0, 3.0), OldVectors.dense(10.0, 20.0, 30.0)), + (Vectors.sparse(3, Seq((0, 2.0), (2, 3.0))), OldVectors.sparse(3, Seq((0, 20.0), (2, 30.0)))) + ).toDF("vec", "oldVec") + + val result = df1.select(vector_to_array('vec), vector_to_array('oldVec)) + .as[(List[Double], List[Double])] + .collect() - val result = df1.select(vector_to_dense_array('vec)) - .as[Array[Double]].collect() val expected = Array( - Array(1.0, 2.0, 3.0), - Array(2.0, 0.0, 3.0) + (List(1.0, 2.0, 3.0), List(10.0, 20.0, 30.0)), + (List(2.0, 0.0, 3.0), List(20.0, 0.0, 30.0)) ) - assert(result === expected) } } diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py index 17a5abbec5c74..f0c99a043eac1 100644 --- a/python/pyspark/ml/functions.py +++ b/python/pyspark/ml/functions.py @@ -18,19 +18,25 @@ from pyspark import since, SparkContext from pyspark.sql.column import Column, _to_java_column + @since(3.0) -def vector_to_dense_array(col): +def vector_to_array(col): """ Convert MLlib sparse/dense vectors in a DataFrame into dense arrays. >>> from pyspark.ml.linalg import Vectors - >>> from pyspark.ml.functions import vector_to_dense_array + >>> from pyspark.ml.functions import vector_to_array + >>> from pyspark.mllib.linalg import Vectors as OldVectors >>> df = spark.createDataFrame([ - ... (Vectors.dense(1.0, 2.0, 3.0),), - ... (Vectors.sparse(3, [(0, 2.0), (2, 3.0)]),)], ["vec"]) - >>> df.select(vector_to_dense_array("vec").alias("arr")).collect() - [Row(arr=[1.0, 2.0, 3.0]), Row(arr=[2.0, 0.0, 3.0])] + ... (Vectors.dense(1.0, 2.0, 3.0), OldVectors.dense(10.0, 20.0, 30.0)), + ... (Vectors.sparse(3, [(0, 2.0), (2, 3.0)]), + ... OldVectors.sparse(3, [(0, 20.0), (2, 30.0)]))], + ... ["vec", "oldVec"]) + >>> df.select(vector_to_array("vec").alias("vec"), + ... vector_to_array("oldVec").alias("oldVec")).collect() + [Row(vec=[1.0, 2.0, 3.0], oldVec=[10.0, 20.0, 30.0]), + Row(vec=[2.0, 0.0, 3.0], oldVec=[20.0, 0.0, 30.0])] """ sc = SparkContext._active_spark_context return Column( - sc._jvm.org.apache.spark.ml.functions.vector_to_dense_array(_to_java_column(col))) + sc._jvm.org.apache.spark.ml.functions.vector_to_array(_to_java_column(col))) From 5aacfbc3fed9e2036bca1d0291277ea9d6097577 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Wed, 18 Dec 2019 16:15:22 +0800 Subject: [PATCH 4/9] address comments --- dev/sparktestsupport/modules.py | 1 + .../main/scala/org/apache/spark/ml/functions.scala | 10 +++++----- .../scala/org/apache/spark/ml/FunctionsSuite.scala | 14 +++++++------- python/docs/pyspark.ml.rst | 8 ++++++++ python/pyspark/ml/functions.py | 2 +- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 1443584ccbcb8..41793593e9954 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -460,6 +460,7 @@ def __hash__(self): "pyspark.ml.evaluation", "pyspark.ml.feature", "pyspark.ml.fpm", + "pyspark.ml.functions", "pyspark.ml.image", "pyspark.ml.linalg.__init__", "pyspark.ml.recommendation", diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala index 2fa9b7f5155fe..896c75aee1b02 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/functions.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala @@ -28,20 +28,20 @@ import org.apache.spark.sql.functions.udf object functions { // scalastyle:on - private[ml] val vector_to_array_udf = udf { vec: Any => + private val vectorToArrayUdf = udf { vec: Any => vec match { case v: Vector => v.toArray case v: OldVector => v.toArray case _ => throw new IllegalArgumentException( - "function vector_to_array require an argument of type " + + "function vector_to_array requires a non-null input argument and input type must be " + "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`.") } - } + }.asNonNullable() /** - * Convert MLlib sparse/dense vectors in a DataFrame into dense arrays. + * Converts a column of MLlib sparse/dense vectors into a column of dense arrays. * * @since 3.0.0 */ - def vector_to_array(v: Column): Column = vector_to_array_udf(v) + def vector_to_array(v: Column): Column = vectorToArrayUdf(v) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala index 2454f83d2e54c..0bb671f476f24 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala @@ -27,18 +27,18 @@ class FunctionsSuite extends MLTest { import testImplicits._ test("test vector_to_array") { - val df1 = Seq( + val df = Seq( (Vectors.dense(1.0, 2.0, 3.0), OldVectors.dense(10.0, 20.0, 30.0)), (Vectors.sparse(3, Seq((0, 2.0), (2, 3.0))), OldVectors.sparse(3, Seq((0, 20.0), (2, 30.0)))) ).toDF("vec", "oldVec") - val result = df1.select(vector_to_array('vec), vector_to_array('oldVec)) - .as[(List[Double], List[Double])] - .collect() + val result = df.select(vector_to_array('vec), vector_to_array('oldVec)) + .as[(Seq[Double], Seq[Double])] + .collect().toSeq - val expected = Array( - (List(1.0, 2.0, 3.0), List(10.0, 20.0, 30.0)), - (List(2.0, 0.0, 3.0), List(20.0, 0.0, 30.0)) + val expected = Seq( + (Seq(1.0, 2.0, 3.0), Seq(10.0, 20.0, 30.0)), + (Seq(2.0, 0.0, 3.0), Seq(20.0, 0.0, 30.0)) ) assert(result === expected) } diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst index 6a5d81706f071..e31dfddd5988e 100644 --- a/python/docs/pyspark.ml.rst +++ b/python/docs/pyspark.ml.rst @@ -41,6 +41,14 @@ pyspark.ml.clustering module :undoc-members: :inherited-members: +pyspark.ml.functions module +---------------------------- + +.. automodule:: pyspark.ml.functions + :members: + :undoc-members: + :inherited-members: + pyspark.ml.linalg module ---------------------------- diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py index f0c99a043eac1..70d5dc0283388 100644 --- a/python/pyspark/ml/functions.py +++ b/python/pyspark/ml/functions.py @@ -22,7 +22,7 @@ @since(3.0) def vector_to_array(col): """ - Convert MLlib sparse/dense vectors in a DataFrame into dense arrays. + Converts a column of MLlib sparse/dense vectors into a column of dense arrays. >>> from pyspark.ml.linalg import Vectors >>> from pyspark.ml.functions import vector_to_array From 66e3f5e73d1bcca96bd3a78ea77358118cbfa10a Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Thu, 19 Dec 2019 16:48:43 +0800 Subject: [PATCH 5/9] add doctest --- python/pyspark/ml/functions.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py index 70d5dc0283388..d81d44a3c3e53 100644 --- a/python/pyspark/ml/functions.py +++ b/python/pyspark/ml/functions.py @@ -40,3 +40,25 @@ def vector_to_array(col): sc = SparkContext._active_spark_context return Column( sc._jvm.org.apache.spark.ml.functions.vector_to_array(_to_java_column(col))) + + +def _test(): + import doctest + import pyspark.ml.functions + globs = pyspark.ml.functions.__dict__.copy() + spark = SparkSession.builder \ + .master("local[2]") \ + .appName("ml.functions tests") \ + .getOrCreate() + sc = spark.sparkContext + globs['sc'] = sc + globs['spark'] = spark + + (failure_count, test_count) = doctest.testmod(pyspark.ml.functions, globs=globs) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() From a41d01a6276da7b026a926036abcf28b76a58642 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Thu, 19 Dec 2019 17:02:52 +0800 Subject: [PATCH 6/9] fix --- python/pyspark/ml/functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py index d81d44a3c3e53..f60269cb63645 100644 --- a/python/pyspark/ml/functions.py +++ b/python/pyspark/ml/functions.py @@ -44,6 +44,7 @@ def vector_to_array(col): def _test(): import doctest + from pyspark.sql import SparkSession import pyspark.ml.functions globs = pyspark.ml.functions.__dict__.copy() spark = SparkSession.builder \ From 22865e04da3bf93a47a6b14a40bd75cab59e39cf Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Thu, 19 Dec 2019 18:13:26 +0800 Subject: [PATCH 7/9] fix doctest --- python/pyspark/ml/functions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py index f60269cb63645..2b4d8ddcd00a8 100644 --- a/python/pyspark/ml/functions.py +++ b/python/pyspark/ml/functions.py @@ -46,6 +46,7 @@ def _test(): import doctest from pyspark.sql import SparkSession import pyspark.ml.functions + import sys globs = pyspark.ml.functions.__dict__.copy() spark = SparkSession.builder \ .master("local[2]") \ @@ -55,7 +56,9 @@ def _test(): globs['sc'] = sc globs['spark'] = spark - (failure_count, test_count) = doctest.testmod(pyspark.ml.functions, globs=globs) + (failure_count, test_count) = doctest.testmod( + pyspark.ml.functions, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) spark.stop() if failure_count: sys.exit(-1) From 05a525d30b365f60547b342077f1752bd405a370 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Sat, 21 Dec 2019 11:47:07 +0800 Subject: [PATCH 8/9] address comments --- .../scala/org/apache/spark/ml/functions.scala | 5 ++-- .../org/apache/spark/ml/FunctionsSuite.scala | 23 ++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala index 896c75aee1b02..1faf562c4d896 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/functions.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala @@ -32,9 +32,10 @@ object functions { vec match { case v: Vector => v.toArray case v: OldVector => v.toArray - case _ => throw new IllegalArgumentException( + case v => throw new IllegalArgumentException( "function vector_to_array requires a non-null input argument and input type must be " + - "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`.") + "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " + + s"but got ${ if (v == null) "null" else v.getClass.getName }.") } }.asNonNullable() diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala index 0bb671f476f24..7ce4627911c26 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala @@ -17,10 +17,12 @@ package org.apache.spark.ml +import org.apache.spark.SparkException import org.apache.spark.ml.functions.vector_to_array import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.util.MLTest import org.apache.spark.mllib.linalg.{Vectors => OldVectors} +import org.apache.spark.sql.functions.col class FunctionsSuite extends MLTest { @@ -32,7 +34,7 @@ class FunctionsSuite extends MLTest { (Vectors.sparse(3, Seq((0, 2.0), (2, 3.0))), OldVectors.sparse(3, Seq((0, 20.0), (2, 30.0)))) ).toDF("vec", "oldVec") - val result = df.select(vector_to_array('vec), vector_to_array('oldVec)) + val result = df.select(vector_to_array('vec), vector_to_array('oldVec)) .as[(Seq[Double], Seq[Double])] .collect().toSeq @@ -41,5 +43,24 @@ class FunctionsSuite extends MLTest { (Seq(2.0, 0.0, 3.0), Seq(20.0, 0.0, 30.0)) ) assert(result === expected) + + val df2 = Seq( + (Vectors.dense(1.0, 2.0, 3.0), + OldVectors.dense(10.0, 20.0, 30.0), 1), + (null, null, 0) + ).toDF("vec", "oldVec", "label") + + + for ((colName, valType) <- Seq( + ("vec", "null"), ("oldVec", "null"), ("label", "java.lang.Integer"))) { + val thrown1 = intercept[SparkException] { + df2.select(vector_to_array(col(colName))).count + } + assert(thrown1.getCause.getMessage.contains( + "function vector_to_array requires a non-null input argument and input type must be " + + "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " + + s"but got ${valType}")) + + } } } From d257dce703986e4ed325af0bae07c88a467a684e Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Sat, 21 Dec 2019 11:51:13 +0800 Subject: [PATCH 9/9] update --- .../src/test/scala/org/apache/spark/ml/FunctionsSuite.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala index 7ce4627911c26..2f5062c689fc7 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala @@ -58,9 +58,8 @@ class FunctionsSuite extends MLTest { } assert(thrown1.getCause.getMessage.contains( "function vector_to_array requires a non-null input argument and input type must be " + - "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " + - s"but got ${valType}")) - + "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " + + s"but got ${valType}")) } } }