Skip to content

Commit

Permalink
[MLlib] [SPARK-5301] Missing conversions and operations on IndexedRow…
Browse files Browse the repository at this point in the history
…Matrix and CoordinateMatrix

* Transpose is missing from CoordinateMatrix (this is cheap to compute, so it should be there)
* IndexedRowMatrix should be convertable to CoordinateMatrix (conversion added)

Tests for both added.

Author: Reza Zadeh <[email protected]>

Closes apache#4089 from rezazadeh/matutils and squashes the following commits:

ec5238b [Reza Zadeh] Array -> Iterator to avoid temp array
3ce0b5d [Reza Zadeh] Array -> Iterator
bbc907a [Reza Zadeh] Use 'i' for index, and zipWithIndex
cb10ae5 [Reza Zadeh] remove unnecessary import
a7ae048 [Reza Zadeh] Missing linear algebra utilities
  • Loading branch information
Reza Zadeh authored and mengxr committed Jan 21, 2015
1 parent 2eeada3 commit aa1e22b
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ class CoordinateMatrix(
nRows
}

/** Transposes this CoordinateMatrix. */
def transpose(): CoordinateMatrix = {
new CoordinateMatrix(entries.map(x => MatrixEntry(x.j, x.i, x.value)), numCols(), numRows())
}

/** Converts to IndexedRowMatrix. The number of columns must be within the integer range. */
def toIndexedRowMatrix(): IndexedRowMatrix = {
val nl = numCols()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,23 @@ class IndexedRowMatrix(
new RowMatrix(rows.map(_.vector), 0L, nCols)
}

/**
* Converts this matrix to a
* [[org.apache.spark.mllib.linalg.distributed.CoordinateMatrix]].
*/
def toCoordinateMatrix(): CoordinateMatrix = {
val entries = rows.flatMap { row =>
val rowIndex = row.index
row.vector match {
case SparseVector(size, indices, values) =>
Iterator.tabulate(indices.size)(i => MatrixEntry(rowIndex, indices(i), values(i)))
case DenseVector(values) =>
Iterator.tabulate(values.size)(i => MatrixEntry(rowIndex, i, values(i)))
}
}
new CoordinateMatrix(entries, numRows(), numCols())
}

/**
* Computes the singular value decomposition of this IndexedRowMatrix.
* Denote this matrix by A (m x n), this will compute matrices U, S, V such that A = U * S * V'.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ class CoordinateMatrixSuite extends FunSuite with MLlibTestSparkContext {
assert(mat.toBreeze() === expected)
}

test("transpose") {
val transposed = mat.transpose()
assert(mat.toBreeze().t === transposed.toBreeze())
}

test("toIndexedRowMatrix") {
val indexedRowMatrix = mat.toIndexedRowMatrix()
val expected = BDM(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ class IndexedRowMatrixSuite extends FunSuite with MLlibTestSparkContext {
assert(rowMat.rows.collect().toSeq === data.map(_.vector).toSeq)
}

test("toCoordinateMatrix") {
val idxRowMat = new IndexedRowMatrix(indexedRows)
val coordMat = idxRowMat.toCoordinateMatrix()
assert(coordMat.numRows() === m)
assert(coordMat.numCols() === n)
assert(coordMat.toBreeze() === idxRowMat.toBreeze())
}

test("multiply a local matrix") {
val A = new IndexedRowMatrix(indexedRows)
val B = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
Expand Down

0 comments on commit aa1e22b

Please sign in to comment.