From e48a6e4f95f0f07e80ee63741fbf2ad546fe5919 Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Tue, 27 Jan 2015 15:02:36 +0800 Subject: [PATCH 1/3] make latent svd computation constraint clear --- .../org/apache/spark/mllib/linalg/distributed/RowMatrix.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 02075edbabf85..d35ef3e1cd321 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -246,6 +246,8 @@ class RowMatrix( val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]] EigenValueDecomposition.symmetricEigs(v => G * v, n, k, tol, maxIter) case SVDMode.LocalLAPACK => + //breeze (v0.10) svd latent constraint, 7 * n * n + 4 * n < Int.MaxValue + require(n < 17515, s"$n exceeds the breeze svd capability") val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]] val brzSvd.SVD(uFull: BDM[Double], sigmaSquaresFull: BDV[Double], _) = brzSvd(G) (sigmaSquaresFull, uFull) From 23860e469d169e7e7da487dd58f8113785587d49 Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Tue, 27 Jan 2015 15:57:18 +0800 Subject: [PATCH 2/3] fix comment style --- .../org/apache/spark/mllib/linalg/distributed/RowMatrix.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index d35ef3e1cd321..92d17adeccc42 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -246,7 +246,7 @@ class RowMatrix( val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]] EigenValueDecomposition.symmetricEigs(v => G * v, n, k, tol, maxIter) case SVDMode.LocalLAPACK => - //breeze (v0.10) svd latent constraint, 7 * n * n + 4 * n < Int.MaxValue + // breeze (v0.10) svd latent constraint, 7 * n * n + 4 * n < Int.MaxValue require(n < 17515, s"$n exceeds the breeze svd capability") val G = computeGramianMatrix().toBreeze.asInstanceOf[BDM[Double]] val brzSvd.SVD(uFull: BDM[Double], sigmaSquaresFull: BDV[Double], _) = brzSvd(G) From f7864d003dd7f103704411bb1c4cbaf3dc55bb4c Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Thu, 29 Jan 2015 21:20:00 +0800 Subject: [PATCH 3/3] update auto logic for rowMatrix svd --- .../apache/spark/mllib/linalg/distributed/RowMatrix.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 92d17adeccc42..3587f726e5d9f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -220,8 +220,12 @@ class RowMatrix( val computeMode = mode match { case "auto" => + if(k > 5000) { + logWarning(s"computing svd with k=$k and n=$n, please check necessity") + } + // TODO: The conditions below are not fully tested. - if (n < 100 || k > n / 2) { + if (n < 100 || (k > n / 2 && n <= 15000)) { // If n is small or k is large compared with n, we better compute the Gramian matrix first // and then compute its eigenvalues locally, instead of making multiple passes. if (k < n / 3) {