diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index f0f6f3cfea1ba..54ee930d61003 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -212,10 +212,8 @@ private[mllib] object BLAS extends Serializable with Logging { /** * C := alpha * A * B + beta * C - * @param transA specify whether to use matrix A, or the transpose of matrix A. Should be "N" or - * "n" to use A, and "T" or "t" to use the transpose of A. - * @param transB specify whether to use matrix B, or the transpose of matrix B. Should be "N" or - * "n" to use B, and "T" or "t" to use the transpose of B. + * @param transA whether to use the transpose of matrix A (true), or A itself (false). + * @param transB whether to use the transpose of matrix B (true), or B itself (false). * @param alpha a scalar to scale the multiplication A * B. * @param A the matrix A that will be left multiplied to B. Size of m x k. * @param B the matrix B that will be left multiplied by A. Size of k x n. @@ -231,7 +229,7 @@ private[mllib] object BLAS extends Serializable with Logging { beta: Double, C: DenseMatrix): Unit = { if (alpha == 0.0) { - logWarning("gemm: alpha is equal to 0. Returning C.") + logDebug("gemm: alpha is equal to 0. Returning C.") } else { A match { case sparse: SparseMatrix => @@ -319,7 +317,7 @@ private[mllib] object BLAS extends Serializable with Logging { // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices if (transA){ var colCounterForB = 0 - if (!transB){ // Expensive to put the check inside the loop + if (!transB) { // Expensive to put the check inside the loop while (colCounterForB < nB) { var rowCounterForA = 0 val Cstart = colCounterForB * mA @@ -360,7 +358,7 @@ private[mllib] object BLAS extends Serializable with Logging { } else { // Scale matrix first if `beta` is not equal to 0.0 if (beta != 0.0){ - nativeBLAS.dscal(C.values.length, beta, C.values, 1) + f2jBLAS.dscal(C.values.length, beta, C.values, 1) } // Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of // B, and added to C. @@ -368,13 +366,14 @@ private[mllib] object BLAS extends Serializable with Logging { if (!transB) { // Expensive to put the check inside the loop while (colCounterForB < nB) { var colCounterForA = 0 // The column of A to multiply with the row of B - while (colCounterForA < kA){ + val Bstart = colCounterForB * kB + val Cstart = colCounterForB * mA + while (colCounterForA < kA) { var i = Acols(colCounterForA) val indEnd = Acols(colCounterForA + 1) - val Bval = B(colCounterForA, colCounterForB) - val Cstart = colCounterForB * mA + val Bval = B.values(Bstart + colCounterForA) * alpha while (i < indEnd){ - C.values(Cstart + Arows(i)) += Avals(i) * Bval * alpha + C.values(Cstart + Arows(i)) += Avals(i) * Bval i += 1 } colCounterForA += 1 @@ -384,13 +383,13 @@ private[mllib] object BLAS extends Serializable with Logging { } else { while (colCounterForB < nB) { var colCounterForA = 0 // The column of A to multiply with the row of B + val Cstart = colCounterForB * mA while (colCounterForA < kA){ var i = Acols(colCounterForA) val indEnd = Acols(colCounterForA + 1) - val Bval = B(colCounterForB, colCounterForA) - val Cstart = colCounterForB * mA + val Bval = B(colCounterForB, colCounterForA) * alpha while (i < indEnd){ - C.values(Cstart + Arows(i)) += Avals(i) * Bval * alpha + C.values(Cstart + Arows(i)) += Avals(i) * Bval i += 1 } colCounterForA += 1 @@ -403,8 +402,7 @@ private[mllib] object BLAS extends Serializable with Logging { /** * y := alpha * A * x + beta * y - * @param trans specify whether to use matrix A, or the transpose of matrix A. Should be "N" or - * "n" to use A, and "T" or "t" to use the transpose of A. + * @param trans whether to use the transpose of matrix A (true), or A itself (false). * @param alpha a scalar to scale the multiplication A * x. * @param A the matrix A that will be left multiplied to x. Size of m x n. * @param x the vector x that will be left multiplied by A. Size of n x 1. @@ -427,7 +425,7 @@ private[mllib] object BLAS extends Serializable with Logging { require(mA == y.size, s"The rows of A don't match the number of elements of y. A: $mA, y:${y.size}}") if (alpha == 0.0) { - logWarning("gemv: alpha is equal to 0. Returning y.") + logDebug("gemv: alpha is equal to 0. Returning y.") } else { A match { case sparse: SparseMatrix => @@ -458,47 +456,6 @@ private[mllib] object BLAS extends Serializable with Logging { gemv(false, alpha, A, x, beta, y) } - /** - * y := alpha * A * x - * - * @param trans specify whether to use matrix A, or the transpose of matrix A. Should be "N" or - * "n" to use A, and "T" or "t" to use the transpose of A. - * @param alpha a scalar to scale the multiplication A * x. - * @param A the matrix A that will be left multiplied to x. Size of m x n. - * @param x the vector x that will be left multiplied by A. Size of n x 1. - * - * @return `DenseVector` y, the result of the matrix-vector multiplication. Size of m x 1. - */ - def gemv( - trans: Boolean, - alpha: Double, - A: Matrix, - x: DenseVector): DenseVector = { - val m = if(!trans) A.numRows else A.numCols - - val y: DenseVector = new DenseVector(Array.fill(m)(0.0)) - gemv(trans, alpha, A, x, 0.0, y) - - y - } - - /** - * y := alpha * A * x - * - * @param alpha a scalar to scale the multiplication A * x. - * @param A the matrix A that will be left multiplied to x. Size of m x n. - * @param x the vector x that will be left multiplied by A. Size of n x 1. - * - * @return `DenseVector` y, the result of the matrix-vector multiplication. Size of m x 1. - */ - def gemv( - alpha: Double, - A: Matrix, - x: DenseVector): DenseVector = { - gemv(false, alpha, A, x) - } - - /** * y := alpha * A * x + beta * y * For `DenseMatrix` A. @@ -539,8 +496,9 @@ private[mllib] object BLAS extends Serializable with Logging { var rowCounter = 0 while (rowCounter < mA){ var i = Arows(rowCounter) + val indEnd = Arows(rowCounter + 1) var sum = 0.0 - while(i < Arows(rowCounter + 1)){ + while(i < indEnd){ sum += Avals(i) * x.values(Acols(i)) i += 1 } @@ -556,9 +514,11 @@ private[mllib] object BLAS extends Serializable with Logging { var colCounterForA = 0 while (colCounterForA < nA){ var i = Acols(colCounterForA) - while (i < Acols(colCounterForA + 1)){ + val indEnd = Acols(colCounterForA + 1) + val xVal = x.values(colCounterForA) * alpha + while (i < indEnd){ val rowIndex = Arows(i) - y.values(rowIndex) += Avals(i) * x.values(colCounterForA) * alpha + y.values(rowIndex) += Avals(i) * xVal i += 1 } colCounterForA += 1 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 7f26e6dd55121..5711532abcf80 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -18,8 +18,11 @@ package org.apache.spark.mllib.linalg import breeze.linalg.{Matrix => BM, DenseMatrix => BDM, CSCMatrix => BSM} + import org.apache.spark.util.random.XORShiftRandom +import java.util.Arrays + /** * Trait for a local matrix. */ @@ -37,9 +40,6 @@ sealed trait Matrix extends Serializable { /** Converts to a breeze matrix. */ private[mllib] def toBreeze: BM[Double] - /** Gets the i-th element in the array backing the matrix. */ - private[mllib] def apply(i: Int): Double - /** Gets the (i, j)-th element. */ private[mllib] def apply(i: Int, j: Int): Double @@ -47,30 +47,38 @@ sealed trait Matrix extends Serializable { private[mllib] def index(i: Int, j: Int): Int /** Update element at (i, j) */ - private[mllib] def update(i: Int, j: Int, v: Double) + private[mllib] def update(i: Int, j: Int, v: Double): Unit /** Get a deep copy of the matrix. */ def copy: Matrix /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */ - def times(y: DenseMatrix): DenseMatrix = { + def multiply(y: DenseMatrix): DenseMatrix = { val C: DenseMatrix = Matrices.zeros(numRows, y.numCols).asInstanceOf[DenseMatrix] BLAS.gemm(false, false, 1.0, this, y, 0.0, C) C } /** Convenience method for `Matrix`-`DenseVector` multiplication. */ - def times(y: DenseVector): DenseVector = BLAS.gemv(1.0, this, y) + def multiply(y: DenseVector): DenseVector = { + val output = new DenseVector(new Array[Double](numRows)) + BLAS.gemv(1.0, this, y, 0.0, output) + output + } /** Convenience method for `Matrix`^T^-`DenseMatrix` multiplication. */ - def transposeTimes(y: DenseMatrix): DenseMatrix = { + def transposeMultiply(y: DenseMatrix): DenseMatrix = { val C: DenseMatrix = Matrices.zeros(numCols, y.numCols).asInstanceOf[DenseMatrix] BLAS.gemm(true, false, 1.0, this, y, 0.0, C) C } /** Convenience method for `Matrix`^T^-`DenseVector` multiplication. */ - def transposeTimes(y: DenseVector): DenseVector = BLAS.gemv(true, 1.0, this, y) + def transposeMultiply(y: DenseVector): DenseVector = { + val output = new DenseVector(new Array[Double](numCols)) + BLAS.gemv(true, 1.0, this, y, 0.0, output) + output + } /** A human readable representation of the matrix */ override def toString: String = toBreeze.toString() @@ -106,7 +114,7 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) private[mllib] def index(i: Int, j: Int): Int = i + numRows * j - private[mllib] def update(i: Int, j: Int, v: Double){ + private[mllib] def update(i: Int, j: Int, v: Double): Unit = { values(index(i, j)) = v } @@ -128,7 +136,8 @@ class DenseMatrix(val numRows: Int, val numCols: Int, val values: Array[Double]) * @param numRows number of rows * @param numCols number of columns * @param colPtrs the index corresponding to the start of a new column - * @param rowIndices the row index of the entry + * @param rowIndices the row index of the entry. They must be in strictly increasing order for each + * column * @param values non-zero matrix entries in column major */ class SparseMatrix( @@ -145,7 +154,7 @@ class SparseMatrix( s"numCols: $numCols") override def toArray: Array[Double] = { - val arr = Array.fill(numRows * numCols)(0.0) + val arr = new Array[Double](numRows * numCols) var j = 0 while (j < numCols) { var i = colPtrs(j) @@ -164,35 +173,19 @@ class SparseMatrix( private[mllib] def toBreeze: BM[Double] = new BSM[Double](values, numRows, numCols, colPtrs, rowIndices) - private[mllib] def apply(i: Int): Double = values(i) - private[mllib] def apply(i: Int, j: Int): Double = { val ind = index(i, j) - if (ind == -1) 0.0 else values(ind) + if (ind < 0) 0.0 else values(ind) } private[mllib] def index(i: Int, j: Int): Int = { - var regionStart = colPtrs(j) - var regionEnd = colPtrs(j + 1) - while (regionStart <= regionEnd) { - val mid = (regionStart + regionEnd) / 2 - if (rowIndices(mid) == i){ - return mid - } else if (regionStart == regionEnd) { - return -1 - } else if (rowIndices(mid) > i) { - regionEnd = mid - } else { - regionStart = mid - } - } - -1 + Arrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i) } - private[mllib] def update(i: Int, j: Int, v: Double){ + private[mllib] def update(i: Int, j: Int, v: Double): Unit = { val ind = index(i, j) if (ind == -1){ - throw new IllegalArgumentException("The given row and column indices correspond to a zero " + + throw new NoSuchElementException("The given row and column indices correspond to a zero " + "value. Only non-zero elements in Sparse Matrices can be updated.") } else { values(index(i, j)) = v @@ -223,17 +216,17 @@ object Matrices { * * @param numRows number of rows * @param numCols number of columns - * @param colPointers the index corresponding to the start of a new column + * @param colPtrs the index corresponding to the start of a new column * @param rowIndices the row index of the entry * @param values non-zero matrix entries in column major */ def sparse( numRows: Int, numCols: Int, - colPointers: Array[Int], + colPtrs: Array[Int], rowIndices: Array[Int], values: Array[Double]): Matrix = { - new SparseMatrix(numRows, numCols, colPointers, rowIndices, values) + new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values) } /** @@ -262,7 +255,7 @@ object Matrices { * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros */ def zeros(numRows: Int, numCols: Int): Matrix = - new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(0.0)) + new DenseMatrix(numRows, numCols, new Array[Double](numRows * numCols)) /** * Generate a `DenseMatrix` consisting of ones. diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala index 6ca02f9e9674b..5d70c914f14b0 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala @@ -136,8 +136,8 @@ class BLASSuite extends FunSuite { val B = new DenseMatrix(3, 2, Array(1.0, 0.0, 0.0, 0.0, 2.0, 1.0)) val expected = new DenseMatrix(4, 2, Array(0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 3.0)) - assert(dA times B ~== expected absTol 1e-15) - assert(sA times B ~== expected absTol 1e-15) + assert(dA multiply B ~== expected absTol 1e-15) + assert(sA multiply B ~== expected absTol 1e-15) val C1 = new DenseMatrix(4, 2, Array(1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0)) val C2 = C1.copy @@ -170,8 +170,8 @@ class BLASSuite extends FunSuite { val sAT = new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0)) - assert(dAT transposeTimes B ~== expected absTol 1e-15) - assert(sAT transposeTimes B ~== expected absTol 1e-15) + assert(dAT transposeMultiply B ~== expected absTol 1e-15) + assert(sAT transposeMultiply B ~== expected absTol 1e-15) gemm(true, false, 1.0, dAT, B, 2.0, C5) gemm(true, false, 1.0, sAT, B, 2.0, C6) @@ -181,7 +181,6 @@ class BLASSuite extends FunSuite { assert(C6 ~== expected2 absTol 1e-15) assert(C7 ~== expected3 absTol 1e-15) assert(C8 ~== expected3 absTol 1e-15) - } test("gemv") { @@ -193,8 +192,8 @@ class BLASSuite extends FunSuite { val x = new DenseVector(Array(1.0, 2.0, 3.0)) val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0)) - assert(dA times x ~== expected absTol 1e-15) - assert(sA times x ~== expected absTol 1e-15) + assert(dA multiply x ~== expected absTol 1e-15) + assert(sA multiply x ~== expected absTol 1e-15) val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0)) val y2 = y1.copy @@ -226,8 +225,8 @@ class BLASSuite extends FunSuite { val sAT = new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0)) - assert(dAT transposeTimes x ~== expected absTol 1e-15) - assert(sAT transposeTimes x ~== expected absTol 1e-15) + assert(dAT transposeMultiply x ~== expected absTol 1e-15) + assert(sAT transposeMultiply x ~== expected absTol 1e-15) gemv(true, 1.0, dAT, x, 2.0, y5) gemv(true, 1.0, sAT, x, 2.0, y6) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala index 8dd5b2caefe0b..73a6d3a27d868 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala @@ -40,9 +40,9 @@ class BreezeMatrixConversionSuite extends FunSuite { test("sparse matrix to breeze") { val values = Array(1.0, 2.0, 4.0, 5.0) - val colPointers = Array(0, 2, 4) + val colPtrs = Array(0, 2, 4) val rowIndices = Array(1, 2, 1, 2) - val mat = Matrices.sparse(3, 2, colPointers, rowIndices, values) + val mat = Matrices.sparse(3, 2, colPtrs, rowIndices, values) val breeze = mat.toBreeze.asInstanceOf[BSM[Double]] assert(breeze.rows === mat.numRows) assert(breeze.cols === mat.numCols) @@ -51,9 +51,9 @@ class BreezeMatrixConversionSuite extends FunSuite { test("sparse breeze matrix to sparse matrix") { val values = Array(1.0, 2.0, 4.0, 5.0) - val colPointers = Array(0, 2, 4) + val colPtrs = Array(0, 2, 4) val rowIndices = Array(1, 2, 1, 2) - val breeze = new BSM[Double](values, 3, 2, colPointers, rowIndices) + val breeze = new BSM[Double](values, 3, 2, colPtrs, rowIndices) val mat = Matrices.fromBreeze(breeze).asInstanceOf[SparseMatrix] assert(mat.numRows === breeze.rows) assert(mat.numCols === breeze.cols) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 2d33559b96091..5f8b8c4b72697 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -41,20 +41,22 @@ class MatricesSuite extends FunSuite { val m = 3 val n = 2 val values = Array(1.0, 2.0, 4.0, 5.0) - val colIndices = Array(0, 2, 4) + val colPtrs = Array(0, 2, 4) val rowIndices = Array(1, 2, 1, 2) - val mat = Matrices.sparse(m, n, colIndices, rowIndices, values).asInstanceOf[SparseMatrix] + val mat = Matrices.sparse(m, n, colPtrs, rowIndices, values).asInstanceOf[SparseMatrix] assert(mat.numRows === m) assert(mat.numCols === n) assert(mat.values.eq(values), "should not copy data") + assert(mat.colPtrs.eq(colPtrs), "should not copy data") + assert(mat.rowIndices.eq(rowIndices), "should not copy data") } test("sparse matrix construction with wrong number of elements") { - intercept[RuntimeException] { + intercept[IllegalArgumentException] { Matrices.sparse(3, 2, Array(0, 1), Array(1, 2, 1), Array(0.0, 1.0, 2.0)) } - intercept[RuntimeException] { + intercept[IllegalArgumentException] { Matrices.sparse(3, 2, Array(0, 1, 2), Array(1, 2), Array(0.0, 1.0, 2.0)) } } @@ -69,9 +71,9 @@ class MatricesSuite extends FunSuite { assert(!denseMat.toArray.eq(denseCopy.toArray)) val values = Array(1.0, 2.0, 4.0, 5.0) - val colIndices = Array(0, 2, 4) + val colPtrs = Array(0, 2, 4) val rowIndices = Array(1, 2, 1, 2) - val sparseMat = Matrices.sparse(m, n, colIndices, rowIndices, values) + val sparseMat = Matrices.sparse(m, n, colPtrs, rowIndices, values) val sparseCopy = sparseMat.copy assert(!sparseMat.toArray.eq(sparseCopy.toArray)) @@ -84,31 +86,30 @@ class MatricesSuite extends FunSuite { val denseMat = new DenseMatrix(m, n, allValues) - assert(denseMat(0, 1) == 3.0) - assert(denseMat(0, 1) == denseMat.values(3)) - assert(denseMat(0, 1) == denseMat(3)) - assert(denseMat(0, 0) == 0.0) + assert(denseMat(0, 1) === 3.0) + assert(denseMat(0, 1) === denseMat.values(3)) + assert(denseMat(0, 1) === denseMat(3)) + assert(denseMat(0, 0) === 0.0) denseMat.update(0, 0, 10.0) - assert(denseMat(0, 0) == 10.0) - assert(denseMat.values(0) == 10.0) + assert(denseMat(0, 0) === 10.0) + assert(denseMat.values(0) === 10.0) val sparseValues = Array(1.0, 2.0, 3.0, 4.0) - val colIndices = Array(0, 2, 4) + val colPtrs = Array(0, 2, 4) val rowIndices = Array(1, 2, 0, 1) - val sparseMat = new SparseMatrix(m, n, colIndices, rowIndices, sparseValues) + val sparseMat = new SparseMatrix(m, n, colPtrs, rowIndices, sparseValues) - assert(sparseMat(0, 1) == 3.0) - assert(sparseMat(0, 1) == sparseMat.values(2)) - assert(sparseMat(0, 1) == sparseMat(2)) - assert(sparseMat(0, 0) == 0.0) + assert(sparseMat(0, 1) === 3.0) + assert(sparseMat(0, 1) === sparseMat.values(2)) + assert(sparseMat(0, 0) === 0.0) - intercept[IllegalArgumentException] { + intercept[NoSuchElementException] { sparseMat.update(0, 0, 10.0) } sparseMat.update(0, 1, 10.0) - assert(sparseMat(0, 1) == 10.0) - assert(sparseMat.values(2) == 10.0) + assert(sparseMat(0, 1) === 10.0) + assert(sparseMat.values(2) === 10.0) } }