Skip to content

Commit

Permalink
Use udf, instead of callUDF
Browse files Browse the repository at this point in the history
  • Loading branch information
yu-iskw committed Jul 2, 2015
1 parent 4d2ad1e commit 19326f8
Showing 1 changed file with 3 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import org.apache.spark.ml.util.{Identifiable, SchemaUtils}
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.mllib.clustering.{KMeans => MLlibKMeans, KMeansModel => MLlibKMeansModel}
import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
import org.apache.spark.sql.functions.{col, callUDF}
import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.{IntegerType, StructType}
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.util.Utils
Expand Down Expand Up @@ -118,7 +118,8 @@ class KMeansModel private[ml] (
}

override def transform(dataset: DataFrame): DataFrame = {
dataset.withColumn($(predictionCol), callUDF(predict _, IntegerType, col($(featuresCol))))
val predictUDF = udf((vector: Vector) => predict(vector))
dataset.withColumn($(predictionCol), predictUDF(col($(featuresCol))))
}

override def transformSchema(schema: StructType): StructType = {
Expand Down

0 comments on commit 19326f8

Please sign in to comment.