Skip to content

Commit

Permalink
Rework sparse matmul to a simpler algorithm as per #942
Browse files Browse the repository at this point in the history
and https://gist.github.com/2993335, with a few modifications to
handle growing, trimming upon completion, and loop ordering, which
seems to make the tests pass.

This code gives a 30% speedup if all LLVM optimizations in codegen.cpp
are turned on.

@doobwa, thanks for the simpler implementation. Sorry about being unable
to merge your pull request due to github acting up.
  • Loading branch information
ViralBShah committed Jun 29, 2012
1 parent 62c1fd0 commit a18880a
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 22 deletions.
70 changes: 48 additions & 22 deletions extras/linalg_sparse.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
## linalg_sparse.jl: Basic Linear Algebra functions for sparse representations ##

require("sparse.jl")

#TODO? probably there's no use at all for these
# dot(x::SparseAccumulator, y::SparseAccumulator)
# cross(a::SparseAccumulator, b::SparseAccumulator) =
Expand Down Expand Up @@ -54,34 +56,58 @@ function (*){T1,T2}(X::Matrix{T1}, A::SparseMatrixCSC{T2})
return Y
end

# sparse matmul (sparse * sparse)
function (*){TvX,TiX,TvY,TiY}(X::SparseMatrixCSC{TvX,TiX}, Y::SparseMatrixCSC{TvY,TiY})
mX, nX = size(X)
mY, nY = size(Y)
if nX != mY; error("error in *: mismatched dimensions"); end
# Sparse matrix multiplication as described in [Gustavson, 1978]:
# http://www.cse.iitb.ac.in/graphics/~anand/website/include/papers/matrix/fast_matrix_mul.pdf

function (*){TvX,TiX,TvY,TiY}(A::SparseMatrixCSC{TvX,TiX}, B::SparseMatrixCSC{TvY,TiY})
mA, nA = size(A)
mB, nB = size(B)
if nA != mB; error("mismatched dimensions"); end
Tv = promote_type(TvX, TvY)
Ti = promote_type(TiX, TiY)

colptr = Array(Ti, nY+1)
colptr[1] = 1
nnz_res = nnz(X) + nnz(Y)
rowval = Array(Ti, nnz_res) # TODO: Need better estimation of result space
nzval = Array(Tv, nnz_res)

colptrY = Y.colptr
rowvalY = Y.rowval
nzvalY = Y.nzval

spa = SparseAccumulator(Tv, Ti, mX);
for y_col = 1:nY
for y_elt = colptrY[y_col] : (colptrY[y_col+1]-1)
x_col = rowvalY[y_elt]
_jl_spa_axpy(spa, nzvalY[y_elt], X, x_col)
colptrA = A.colptr; rowvalA = A.rowval; nzvalA = A.nzval
colptrB = B.colptr; rowvalB = B.rowval; nzvalB = B.nzval
# TODO: Need better estimation of result space
nnzC = min(mA*nB, length(nzvalA) + length(nzvalB))
colptrC = Array(Ti, nB+1)
rowvalC = Array(Ti, nnzC)
nzvalC = Array(Tv, nnzC)

ip = 1
xb = zeros(Ti, mA)
x = zeros(Tv, mA)
for i in 1:nB
if ip + mA > nnzC
rowvalC = grow(rowvalC, 2*nnzC)
nzvalC = grow(nzvalC, 2*nnzC)
end
colptrC[i] = ip
for jp in colptrB[i]:(colptrB[i+1] - 1)
j = rowvalB[jp]
nzB = nzvalB[jp]
for kp in colptrA[j]:(colptrA[j+1] - 1)
k = rowvalA[kp]
nzA = nzvalA[kp]
if xb[k] != i
rowvalC[ip] = k
ip += 1
xb[k] = i
x[k] = nzA * nzB
else
x[k] += nzA * nzB
end
end
end
for vp in colptrC[i]:(ip - 1)
nzvalC[vp] = x[rowvalC[vp]]
end
(rowval, nzval) = _jl_spa_store_reset(spa, y_col, colptr, rowval, nzval)
end
colptrC[nB+1] = ip

SparseMatrixCSC(mX, nY, colptr, rowval, nzval)
rowvalC = del(rowvalC, colptrC[end]:length(rowvalC))
nzvalC = del(nzvalC, colptrC[end]:length(nzvalC))
return SparseMatrixCSC(mA,nB,colptrC,rowvalC,nzvalC)
end

## triu, tril
Expand Down
2 changes: 2 additions & 0 deletions extras/linalg_suitesparse.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require("linalg_sparse.jl")

type MatrixIllConditionedException <: Exception end

function _jl_cholmod_transpose{Tv<:Union(Float64,Complex128)}(S::SparseMatrixCSC{Tv})
Expand Down

0 comments on commit a18880a

Please sign in to comment.