Add fastRG.py

yunjhongwu · Mar 18, 2017 · b5f102a · b5f102a
1 parent c971dae
commit b5f102a
Show file tree

Hide file tree

Showing 2 changed files with 132 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1,12 +1,13 @@
 # Matrix routines
 
+ - `ccnn.py`: Zhang, Y., Liang, P., & Wainwright, M. J. (2016). Convexified convolutional neural networks. *arXiv preprint* arXiv:1609.01000.
  - `cur.py`: Mahoney, M. W., & Drineas, P. (2009). CUR matrix decompositions for improved data analysis. *Proceedings of the National Academy of Sciences*, 106(3), 697-702.
+ - `fastRG.py`: Rohe, K., Tao, J., Han, X., & Binkiewicz, N. (2017). A note on quickly sampling a sparse matrix with low rank expectation. *arXiv preprint* arXiv:1703.02998. 
  - `linear_time_svd`: Drineas, P., Kannan, R., & Mahoney, M. W. (2006). Fast Monte Carlo algorithms for matrices II: Computing a low-rank approximation to a matrix. *SIAM Journal on computing*, 36(1), 158-183.
  - `matrix_completion.py`: Lin, Z., Chen, M., & Ma, Y. (2010). The augmented lagrange multiplier method for exact recovery of corrupted low-rank matrices. *arXiv preprint* arXiv:1009.5055.
  - `neighborhood_smoothing.py`: Zhang, Y., Levina, E. and Zhu, J. (2016) Estimating neighborhood edge probabilities by neighborhood smoothing. *arXiv preprint* arXiv: 1509.08588.
  - `parafac2.py`: Kiers, H. A., Ten Berge, J. M., & Bro, R. (1999). PARAFAC2-Part I. A direct fitting algorithm for the PARAFAC2 model. *Journal of Chemometrics*, 13(3-4), 275-294.
  - `robust_pca.py`: Wright, J., Ganesh, A., Rao, S., Peng, Y., & Ma, Y. (2009). Robust principal component analysis: Exact recovery of corrupted low-rank matrices via convex optimization. *Advances in neural information processing systems*, 2080-2088.
  - `randomized_svd.java`: Halko, N., Martinsson, P. G., & Tropp, J. A. (2011). Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions. *SIAM review*, 53(2), 217-288.
  - `regularized_matrix_regression.py`: Zhou, H., & Li, L. (2014). Regularized matrix regression. *Journal of the Royal Statistical Society: Series B (Statistical Methodology)*, 76(2), 463-483.
- - `ccnn.py`: Zhang, Y., Liang, P., & Wainwright, M. J. (2016). Convexified convolutional neural networks. *arXiv preprint* arXiv:1609.01000.
 
diff --git a/fastRG.py b/fastRG.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Mar 17 15:18:34 2017
+Author: Yun-Jhong Wu
+E-mail: [email protected]
+"""
+
+import numpy as np
+from scipy.sparse import csc_matrix
+
+def howManyEdges(X, S, Y=None):
+    if Y is None:
+        Y = X
+    Cx = np.sum(X, axis=0)
+    Cy = np.sum(Y, axis=0)
+
+    em = Cx @ np.sum(S * Cy, axis=1)
+    avDeg = em / X.shape[0]
+
+    return em, avDeg
+
+
+def fastRG(X, S, Y=None, avgDeg=None, simple=None, PoissonEdges=True, 
+           directed=False, selfLoops=False, returnEdgeList=False, 
+           returnParameters=False):
+    """
+    Rohe, K., Tao, J., Han, X., & Binkiewicz, N. (2017). A note on quickly 
+    sampling a sparse matrix with low rank expectation. arXiv preprint 
+    arXiv:1703.02998.
+    
+    Implementation of fastRG in R
+    https://github.com/karlrohe/fastRG
+    """
+
+    if Y is not None and Y.size > 0:
+        directed = True
+        selfLoops = True
+        simple = False
+        returnY = True
+
+    else:
+        Y = X
+        returnY = False
+
+    if np.any(X < 0) or np.any(S < 0) or np.any(Y < 0):
+        return None
+
+    if simple is not None and simple:
+        selfLoops = False
+        directed = False
+        PoissonEdges = False
+
+    n, K1 = X.shape
+    d, K2 = Y.shape
+
+
+    if avgDeg is not None:
+        _, eDbar = howManyEdges(X, S, Y)
+        S *= avgDeg / eDbar
+
+
+    if not directed:
+        S = (S + S.T) * 0.25
+
+    Cx = np.sum(X, axis=0)
+    Cy = np.sum(Y, axis=0)
+    Xt = (X * (1 / Cx)).T
+    Yt = (Y * (1 / Cy)).T  
+
+    St = Cx[:, None] * S * Cy
+    m = np.random.poisson(np.sum(St))  
+
+    if m == 0:
+        A = csc_matrix((n, d))
+        return (A, X, S, Y if returnY else None) if returnParameters else A
+
+    tabUV = np.random.multinomial(m, pvals=St.ravel() * (1 / np.sum(St))).reshape((K1, K2))
+
+    elist = np.empty((2, m))
+    eitmp = np.empty(m)
+
+    blockDegreesU = np.sum(tabUV, axis=1)
+    tickerU = np.insert(np.cumsum(blockDegreesU), 0, 0) 
+
+    for u in range(K1):
+        if blockDegreesU[u] > 0:
+            elist[0, tickerU[u]:tickerU[u+1]] = np.random.choice(n, size=blockDegreesU[u], 
+                                                                 replace=True, p=Xt[u])
+
+    blockDegreesV = np.sum(tabUV, axis=0)
+    tickerV = np.insert(np.cumsum(blockDegreesV), 0, 0) 
+
+    for v in range(K2):
+        if blockDegreesV[v] > 0:
+            eitmp[tickerV[v]:tickerV[v+1]] = np.random.choice(n, size=blockDegreesV[v], 
+                                                              replace=True, p=Yt[v])
+
+    ticker = 0
+    for u in range(K1):
+        for v in range(K2):   
+            if tabUV[u,v] > 0:            
+                elist[1, ticker:ticker + tabUV[u,v]] = eitmp[tickerV[v]:tickerV[v] + tabUV[u,v]]
+                ticker += tabUV[u, v]
+
+    elist = elist.T
+
+    if not selfLoops:
+        elist = elist[np.where(elist[:, 0] != elist[:, 1])]
+
+    if not directed:
+        if n != d:
+            raise Exception("{0} != {1}: Undirected network requests n == d".format(n, d))
+
+        elist = np.concatenate((elist, elist[:, ::-1]))
+
+    if not PoissonEdges:       
+        e = np.ascontiguousarray(elist)
+        e_unique = np.unique(e.view([('', np.int), ('', np.int)]))
+        elist = e_unique.view(np.int).reshape((e_unique.shape[0], 2))
+
+    if returnEdgeList:
+        return elist
+
+    else:
+        A = csc_matrix((np.ones(elist.shape[0], dtype=np.int), 
+                        (elist[:, 0], elist[:, 1])), 
+                       shape=(n, d), dtype=np.int)      
+
+        return (A, X, S, Y if returnY else None) if returnParameters else A