Skip to content

Commit

Permalink
Add fastRG.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Yun-Jhong Wu committed Mar 18, 2017
1 parent c971dae commit b5f102a
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 1 deletion.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# Matrix routines

- `ccnn.py`: Zhang, Y., Liang, P., & Wainwright, M. J. (2016). Convexified convolutional neural networks. *arXiv preprint* arXiv:1609.01000.
- `cur.py`: Mahoney, M. W., & Drineas, P. (2009). CUR matrix decompositions for improved data analysis. *Proceedings of the National Academy of Sciences*, 106(3), 697-702.
- `fastRG.py`: Rohe, K., Tao, J., Han, X., & Binkiewicz, N. (2017). A note on quickly sampling a sparse matrix with low rank expectation. *arXiv preprint* arXiv:1703.02998.
- `linear_time_svd`: Drineas, P., Kannan, R., & Mahoney, M. W. (2006). Fast Monte Carlo algorithms for matrices II: Computing a low-rank approximation to a matrix. *SIAM Journal on computing*, 36(1), 158-183.
- `matrix_completion.py`: Lin, Z., Chen, M., & Ma, Y. (2010). The augmented lagrange multiplier method for exact recovery of corrupted low-rank matrices. *arXiv preprint* arXiv:1009.5055.
- `neighborhood_smoothing.py`: Zhang, Y., Levina, E. and Zhu, J. (2016) Estimating neighborhood edge probabilities by neighborhood smoothing. *arXiv preprint* arXiv: 1509.08588.
- `parafac2.py`: Kiers, H. A., Ten Berge, J. M., & Bro, R. (1999). PARAFAC2-Part I. A direct fitting algorithm for the PARAFAC2 model. *Journal of Chemometrics*, 13(3-4), 275-294.
- `robust_pca.py`: Wright, J., Ganesh, A., Rao, S., Peng, Y., & Ma, Y. (2009). Robust principal component analysis: Exact recovery of corrupted low-rank matrices via convex optimization. *Advances in neural information processing systems*, 2080-2088.
- `randomized_svd.java`: Halko, N., Martinsson, P. G., & Tropp, J. A. (2011). Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions. *SIAM review*, 53(2), 217-288.
- `regularized_matrix_regression.py`: Zhou, H., & Li, L. (2014). Regularized matrix regression. *Journal of the Royal Statistical Society: Series B (Statistical Methodology)*, 76(2), 463-483.
- `ccnn.py`: Zhang, Y., Liang, P., & Wainwright, M. J. (2016). Convexified convolutional neural networks. *arXiv preprint* arXiv:1609.01000.

130 changes: 130 additions & 0 deletions fastRG.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 17 15:18:34 2017
Author: Yun-Jhong Wu
E-mail: [email protected]
"""

import numpy as np
from scipy.sparse import csc_matrix

def howManyEdges(X, S, Y=None):
if Y is None:
Y = X
Cx = np.sum(X, axis=0)
Cy = np.sum(Y, axis=0)

em = Cx @ np.sum(S * Cy, axis=1)
avDeg = em / X.shape[0]

return em, avDeg


def fastRG(X, S, Y=None, avgDeg=None, simple=None, PoissonEdges=True,
directed=False, selfLoops=False, returnEdgeList=False,
returnParameters=False):
"""
Rohe, K., Tao, J., Han, X., & Binkiewicz, N. (2017). A note on quickly
sampling a sparse matrix with low rank expectation. arXiv preprint
arXiv:1703.02998.
Implementation of fastRG in R
https://github.com/karlrohe/fastRG
"""

if Y is not None and Y.size > 0:
directed = True
selfLoops = True
simple = False
returnY = True

else:
Y = X
returnY = False

if np.any(X < 0) or np.any(S < 0) or np.any(Y < 0):
return None

if simple is not None and simple:
selfLoops = False
directed = False
PoissonEdges = False

n, K1 = X.shape
d, K2 = Y.shape


if avgDeg is not None:
_, eDbar = howManyEdges(X, S, Y)
S *= avgDeg / eDbar


if not directed:
S = (S + S.T) * 0.25

Cx = np.sum(X, axis=0)
Cy = np.sum(Y, axis=0)
Xt = (X * (1 / Cx)).T
Yt = (Y * (1 / Cy)).T

St = Cx[:, None] * S * Cy
m = np.random.poisson(np.sum(St))

if m == 0:
A = csc_matrix((n, d))
return (A, X, S, Y if returnY else None) if returnParameters else A

tabUV = np.random.multinomial(m, pvals=St.ravel() * (1 / np.sum(St))).reshape((K1, K2))

elist = np.empty((2, m))
eitmp = np.empty(m)

blockDegreesU = np.sum(tabUV, axis=1)
tickerU = np.insert(np.cumsum(blockDegreesU), 0, 0)

for u in range(K1):
if blockDegreesU[u] > 0:
elist[0, tickerU[u]:tickerU[u+1]] = np.random.choice(n, size=blockDegreesU[u],
replace=True, p=Xt[u])

blockDegreesV = np.sum(tabUV, axis=0)
tickerV = np.insert(np.cumsum(blockDegreesV), 0, 0)

for v in range(K2):
if blockDegreesV[v] > 0:
eitmp[tickerV[v]:tickerV[v+1]] = np.random.choice(n, size=blockDegreesV[v],
replace=True, p=Yt[v])

ticker = 0
for u in range(K1):
for v in range(K2):
if tabUV[u,v] > 0:
elist[1, ticker:ticker + tabUV[u,v]] = eitmp[tickerV[v]:tickerV[v] + tabUV[u,v]]
ticker += tabUV[u, v]

elist = elist.T

if not selfLoops:
elist = elist[np.where(elist[:, 0] != elist[:, 1])]

if not directed:
if n != d:
raise Exception("{0} != {1}: Undirected network requests n == d".format(n, d))

elist = np.concatenate((elist, elist[:, ::-1]))

if not PoissonEdges:
e = np.ascontiguousarray(elist)
e_unique = np.unique(e.view([('', np.int), ('', np.int)]))
elist = e_unique.view(np.int).reshape((e_unique.shape[0], 2))

if returnEdgeList:
return elist

else:
A = csc_matrix((np.ones(elist.shape[0], dtype=np.int),
(elist[:, 0], elist[:, 1])),
shape=(n, d), dtype=np.int)

return (A, X, S, Y if returnY else None) if returnParameters else A

0 comments on commit b5f102a

Please sign in to comment.