From c1a8e16ac9dc301d9eafca44100c5a857de903d5 Mon Sep 17 00:00:00 2001 From: Travis Galoppo Date: Tue, 11 Nov 2014 18:31:44 -0500 Subject: [PATCH] Made GaussianMixtureModel class serializable Modified sum function for better performance --- .../spark/mllib/clustering/GMMExpectationMaximization.scala | 2 +- .../apache/spark/mllib/clustering/GaussianMixtureModel.scala | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GMMExpectationMaximization.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GMMExpectationMaximization.scala index 9b4d5de65f200..0ff499eec7b9d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GMMExpectationMaximization.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GMMExpectationMaximization.scala @@ -201,7 +201,7 @@ class GMMExpectationMaximization private ( /** Sum the values in array of doubles */ private def sum(x : Array[Double]) : Double = { var s : Double = 0.0 - x.foreach(u => s += u) + (0 until x.length).foreach(j => s += x(j)) s } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index b36123366c9a8..072ed86edebe4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -25,7 +25,10 @@ import org.apache.spark.mllib.linalg.Vector * from each Gaussian i=1..k with probability w(i); mu(i) and sigma(i) are the respective * mean and covariance for each Gaussian distribution i=1..k. */ -class GaussianMixtureModel(val w: Array[Double], val mu: Array[Vector], val sigma: Array[Matrix]) { +class GaussianMixtureModel( + val w: Array[Double], + val mu: Array[Vector], + val sigma: Array[Matrix]) extends Serializable { /** Number of gaussians in mixture */ def k: Int = w.length;