Skip to content

Commit

Permalink
[SPARK-16369][MLLIB] tallSkinnyQR of RowMatrix should aware of empty …
Browse files Browse the repository at this point in the history
…partition

## What changes were proposed in this pull request?

tallSkinnyQR of RowMatrix should aware of empty partition, which could cause exception from Breeze qr decomposition.

See the [archived dev mail](https://mail-archives.apache.org/mod_mbox/spark-dev/201510.mbox/%3CCAF7ADNrycvPL3qX-VZJhq4OYmiUUhoscut_tkOm63Cm18iK1tQmail.gmail.com%3E) for more details.

## How was this patch tested?

Scala unit test.

Author: Xusen Yin <[email protected]>

Closes apache#14049 from yinxusen/SPARK-16369.
  • Loading branch information
yinxusen authored and srowen committed Jul 8, 2016
1 parent a54438c commit 255d74f
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ class RowMatrix @Since("1.0.0") (
def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = {
val col = numCols().toInt
// split rows horizontally into smaller matrices, and compute QR for each of them
val blockQRs = rows.retag(classOf[Vector]).glom().map { partRows =>
val blockQRs = rows.retag(classOf[Vector]).glom().filter(_.length != 0).map { partRows =>
val bdm = BDM.zeros[Double](partRows.length, col)
var i = 0
partRows.foreach { row =>
Expand All @@ -548,10 +548,11 @@ class RowMatrix @Since("1.0.0") (
}

// combine the R part from previous results vertically into a tall matrix
val combinedR = blockQRs.treeReduce{ (r1, r2) =>
val combinedR = blockQRs.treeReduce { (r1, r2) =>
val stackedR = BDM.vertcat(r1, r2)
breeze.linalg.qr.reduced(stackedR).r
}

val finalR = Matrices.fromBreeze(combinedR.toDenseMatrix)
val finalQ = if (computeQ) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors}
import org.apache.spark.mllib.random.RandomRDDs
import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
import org.apache.spark.mllib.util.TestingUtils._

class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {

Expand Down Expand Up @@ -281,6 +282,22 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
assert(cov(i, j) === cov(j, i))
}
}

test("QR decomposition should aware of empty partition (SPARK-16369)") {
val mat: RowMatrix = new RowMatrix(sc.parallelize(denseData, 1))
val qrResult = mat.tallSkinnyQR(true)

val matWithEmptyPartition = new RowMatrix(sc.parallelize(denseData, 8))
val qrResult2 = matWithEmptyPartition.tallSkinnyQR(true)

assert(qrResult.Q.numCols() === qrResult2.Q.numCols(), "Q matrix ncol not match")
assert(qrResult.Q.numRows() === qrResult2.Q.numRows(), "Q matrix nrow not match")
qrResult.Q.rows.collect().zip(qrResult2.Q.rows.collect())
.foreach(x => assert(x._1 ~== x._2 relTol 1E-8, "Q matrix not match"))

qrResult.R.toArray.zip(qrResult2.R.toArray)
.foreach(x => assert(x._1 ~== x._2 relTol 1E-8, "R matrix not match"))
}
}

class RowMatrixClusterSuite extends SparkFunSuite with LocalClusterSparkContext {
Expand Down

0 comments on commit 255d74f

Please sign in to comment.