Use udf, instead of callUDF

apache · Jul 2, 2015 · 19326f8 · 19326f8
1 parent 4d2ad1e
commit 19326f8
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.mllib.clustering.{KMeans => MLlibKMeans, KMeansModel => MLlibKMeansModel}
 import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
-import org.apache.spark.sql.functions.{col, callUDF}
+import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.util.Utils
@@ -118,7 +118,8 @@ class KMeansModel private[ml] (
   }
 
   override def transform(dataset: DataFrame): DataFrame = {
-    dataset.withColumn($(predictionCol), callUDF(predict _, IntegerType, col($(featuresCol))))
+    val predictUDF = udf((vector: Vector) => predict(vector))
+    dataset.withColumn($(predictionCol), predictUDF(col($(featuresCol))))
   }
 
   override def transformSchema(schema: StructType): StructType = {