Add SUR to GeoDaSpace GeoDaCenter#3

only SUR codes were added and merged manually into GeoDaSpace: SUR and new spreg code have dependencies on new libpysal, so simple Copy-n-past the new spreg module to GeoDaSpace doesn't work.
lixun910 · Oct 15, 2018 · 27892dc · 27892dc
1 parent b519dcb
commit 27892dc
Show file tree

Hide file tree

Showing 14 changed files with 4,004 additions and 43 deletions.
diff --git a/econometrics/__init__.py b/econometrics/__init__.py
@@ -18,3 +18,7 @@
 from ml_lag_regimes import *
 from ml_error import *
 from ml_error_regimes import *
+from sur import *
+from sur_utils import *
+from sur_error import *
+from sur_lag import *
diff --git a/econometrics/diagnostics_sur.py b/econometrics/diagnostics_sur.py
@@ -0,0 +1,294 @@
+"""
+Diagnostics for SUR and 3SLS estimation
+"""
+
+__author__= "Luc Anselin [email protected],    \
+             Pedro V. Amaral [email protected]  \
+             Tony Aburaad [email protected]"
+
+
+import numpy as np
+import scipy.stats as stats
+import numpy.linalg as la
+from .sur_utils import sur_dict2mat,sur_mat2dict,sur_corr,spdot
+from .regimes import buildR1var,wald_test
+
+
+__all__ = ['sur_setp','sur_lrtest','sur_lmtest','lam_setp','surLMe','surLMlag']
+
+
+def sur_setp(bigB,varb):
+    ''' Utility to compute standard error, t and p-value
+
+    Parameters
+    ----------
+    bigB    : dictionary of regression coefficient estimates,
+              one vector by equation
+    varb    : variance-covariance matrix of coefficients
+
+    Returns
+    -------
+    surinfdict : dictionary with standard error, t-value, and
+                 p-value array, one for each equation
+
+    '''
+    vvb = varb.diagonal()
+    n_eq = len(bigB.keys())
+    bigK = np.zeros((n_eq,1),dtype=np.int_)
+    for r in range(n_eq):
+        bigK[r] = bigB[r].shape[0]
+    b = sur_dict2mat(bigB)
+    se = np.sqrt(vvb)
+    se.resize(len(se),1)
+    t = np.divide(b,se)
+    tp = stats.norm.sf(abs(t))*2
+    surinf = np.hstack((se,t,tp))
+    surinfdict = sur_mat2dict(surinf,bigK)
+    return surinfdict
+
+def lam_setp(lam,vm):
+    """Standard errors, t-test and p-value for lambda in SUR Error ML
+
+    Parameters
+    ----------
+    lam        : n_eq x 1 array with ML estimates for spatial error
+                 autoregressive coefficient
+    vm         : n_eq x n_eq subset of variance-covariance matrix for
+                 lambda and Sigma in SUR Error ML
+                 (needs to be subset from full vm)
+
+    Returns
+    -------
+               : tuple with arrays for standard error, t-value and p-value
+                 (each element in the tuple is an n_eq x 1 array)
+
+    """
+    vvb = vm.diagonal()
+    se = np.sqrt(vvb)
+    se.resize(len(se),1)
+    t = np.divide(lam,se)
+    tp = stats.norm.sf(abs(t))*2
+    return (se,t,tp)
+
+def sur_lrtest(n,n_eq,ldetS0,ldetS1):
+    ''' Likelihood Ratio test on off-diagonal elements of Sigma
+
+        Parameters
+        ----------
+        n        : cross-sectional dimension (number of observations for an equation)
+        n_eq     : number of equations
+        ldetS0   : log determinant of Sigma for OLS case
+        ldetS1   : log determinant of Sigma for SUR case (should be iterated)
+
+        Returns
+        -------
+        (lrtest,M,pvalue) : tupel with value of test statistic (lrtest),
+                            degrees of freedom (M, as an integer)
+                            p-value
+
+    '''
+    M = n_eq * (n_eq - 1)/2.0
+    lrtest = n * (ldetS0 - ldetS1)
+    pvalue = stats.chi2.sf(lrtest,M)
+    return (lrtest,int(M),pvalue)
+
+
+def sur_lmtest(n,n_eq,sig):
+    ''' Lagrange Multiplier test on off-diagonal elements of Sigma
+
+        Parameters
+        ----------
+        n        : cross-sectional dimension (number of observations for an equation)
+        n_eq     : number of equations
+        sig      : inter-equation covariance matrix for null model (OLS)
+
+        Returns
+        -------
+        (lmtest,M,pvalue) : tupel with value of test statistic (lmtest),
+                            degrees of freedom (M, as an integer)
+                            p-value
+
+    '''
+    R = sur_corr(sig)
+    tr = np.trace(np.dot(R.T,R))
+    M = n_eq * (n_eq - 1)/2.0
+    lmtest = (n/2.0) * (tr - n_eq)
+    pvalue = stats.chi2.sf(lmtest,M)
+    return (lmtest,int(M),pvalue)
+
+
+def surLMe(n_eq,WS,bigE,sig):
+    """Lagrange Multiplier test on error spatial autocorrelation in SUR
+
+    Parameters
+    ----------
+    n_eq       : number of equations
+    WS         : spatial weights matrix in sparse form
+    bigE       : n x n_eq matrix of residuals by equation
+    sig        : cross-equation error covariance matrix
+
+    Returns
+    -------
+    (LMe,n_eq,pvalue) : tupel with value of statistic (LMe), degrees
+                        of freedom (n_eq) and p-value
+
+    """
+    # spatially lagged residuals
+    WbigE = WS * bigE
+    # score
+    EWE = np.dot(bigE.T,WbigE)
+    sigi = la.inv(sig)
+    SEWE = sigi * EWE
+    #score = SEWE.sum(axis=1)
+    #score.resize(n_eq,1)
+    # note score is column sum of Sig_i * E'WE, a 1 by n_eq row vector
+    # previously stored as column
+    score = SEWE.sum(axis=0)
+    score.resize(1,n_eq)
+
+
+    # trace terms
+    WW = WS * WS
+    trWW = np.sum(WW.diagonal())
+    WTW = WS.T * WS
+    trWtW = np.sum(WTW.diagonal())
+    # denominator
+    SiS = sigi * sig
+    Tii = trWW * np.identity(n_eq)
+    tSiS = trWtW * SiS
+    denom = Tii + tSiS
+    idenom = la.inv(denom)
+    # test statistic
+    #LMe = np.dot(np.dot(score.T,idenom),score)[0][0]
+    # score is now row vector
+    LMe = np.dot(np.dot(score,idenom),score.T)[0][0]
+    pvalue = stats.chi2.sf(LMe,n_eq)
+    return (LMe,n_eq,pvalue)
+
+def surLMlag(n_eq,WS,bigy,bigX,bigE,bigYP,sig,varb):
+    """Lagrange Multiplier test on lag spatial autocorrelation in SUR
+
+    Parameters
+    ----------
+    n_eq       : number of equations
+    WS         : spatial weights matrix in sparse form
+    bigy       : dictionary with y values
+    bigX       : dictionary with X values
+    bigE       : n x n_eq matrix of residuals by equation
+    bigYP      : n x n_eq matrix of predicted values by equation
+    sig        : cross-equation error covariance matrix
+    varb       : variance-covariance matrix for b coefficients (inverse of Ibb)
+
+    Returns
+    -------
+    (LMlag,n_eq,pvalue) : tupel with value of statistic (LMlag), degrees
+                          of freedom (n_eq) and p-value
+
+    """
+    # Score
+    Y = np.hstack((bigy[r]) for r in range(n_eq))
+    WY = WS * Y
+    EWY = np.dot(bigE.T,WY)
+    sigi = la.inv(sig)
+    SEWE = sigi * EWY
+    score = SEWE.sum(axis=0)  # column sums
+    score.resize(1,n_eq)  # score as a row vector
+
+    # I(rho,rho) as partitioned inverse, eq 72
+    # trace terms
+    WW = WS * WS
+    trWW = np.sum(WW.diagonal()) #T1
+    WTW = WS.T * WS
+    trWtW = np.sum(WTW.diagonal()) #T2
+
+    # I(rho,rho)
+    SiS = sigi * sig
+    Tii = trWW * np.identity(n_eq) #T1It
+    tSiS = trWtW * SiS
+    firstHalf = Tii + tSiS
+    WbigYP = WS * bigYP
+    inner = np.dot(WbigYP.T, WbigYP)
+    secondHalf= sigi * inner
+    Ipp= firstHalf + secondHalf #eq. 75
+
+    # I(b,b) inverse is varb
+
+    # I(b,rho)
+    bp = sigi[0,] * spdot(bigX[0].T,WbigYP)  #initialize
+    for r in range(1,n_eq):
+        bpwork = sigi[r,] * spdot(bigX[r].T,WbigYP)
+        bp = np.vstack((bp,bpwork))
+    # partitioned part    
+    i_inner = Ipp - np.dot(np.dot(bp.T, varb), bp)
+    # partitioned inverse of information matrix
+    Ippi = la.inv(i_inner)
+
+    # test statistic
+    LMlag = np.dot(np.dot(score,Ippi),score.T)[0][0]
+    # p-value
+    pvalue = stats.chi2.sf(LMlag,n_eq)
+    return (LMlag,n_eq,pvalue)
+
+def sur_chow(n_eq,bigK,bSUR,varb):
+    """test on constancy of regression coefficients across equations in
+       a SUR specification
+
+       Note: requires a previous check on constancy of number of coefficients
+             across equations; no other checks are carried out, so it is possible
+             that the results are meaningless if the variables are not listed in
+             the same order in each equation.
+
+       Parameters
+       ----------
+       n_eq       : integer, number of equations
+       bigK       : array with the number of variables by equation (includes constant)
+       bSUR       : dictionary with the SUR regression coefficients by equation
+       varb       : array with the variance-covariance matrix for the SUR regression
+                    coefficients
+
+       Returns
+       -------
+       test       : a list with for each coefficient (in order) a tuple with the
+                    value of the test statistic, the degrees of freedom, and the
+                    p-value
+
+    """
+    kr = bigK[0][0]
+    test = []
+    bb = sur_dict2mat(bSUR)
+    kf = 0
+    nr = n_eq
+    df = n_eq - 1
+    for i in range(kr):
+        Ri = buildR1var(i,kr,kf,0,nr)
+        tt,p = wald_test(bb,Ri,np.zeros((df,1)),varb)
+        test.append((tt,df,p))
+    return test
+
+def sur_joinrho(n_eq,bigK,bSUR,varb):
+    """Test on joint significance of spatial autoregressive coefficient in SUR
+
+       Parameters
+       ----------
+       n_eq       : integer, number of equations
+       bigK       : n_eq x 1 array with number of variables by equation
+                    (includes constant term, exogenous and endogeneous and
+                    spatial lag)
+       bSUR       : dictionary with regression coefficients by equation, with
+                    the spatial autoregressive term as last
+       varb       : variance-covariance matrix for regression coefficients
+
+       Returns
+       -------
+                  : tuple with test statistic, degrees of freedom, p-value
+
+    """
+    bb = sur_dict2mat(bSUR)
+    R = np.zeros((n_eq,varb.shape[0]))
+    q = np.zeros((n_eq,1))
+    kc = -1
+    for i in range(n_eq):
+        kc = kc + bigK[i]
+        R[i,kc] = 1
+    w,p = wald_test(bb,R,q,varb)
+    return (w,n_eq,p)
diff --git a/econometrics/ml_error.py b/econometrics/ml_error.py
@@ -480,6 +480,28 @@ def err_c_loglik(lam, n, y, ylag, x, xlag, W):
     clik = nlsig2 - jacob
     return clik
 
+def err_c_loglik_sp(lam, n, y, ylag, x, xlag, I, Wsp):
+    # concentrated log-lik for error model, no constants, LU
+    if isinstance(lam, np.ndarray):
+        if lam.shape == (1,1):
+            lam = lam[0][0] #why does the interior value change?
+    ys = y - lam * ylag
+    xs = x - lam * xlag
+    ysys = np.dot(ys.T, ys)
+    xsxs = np.dot(xs.T, xs)
+    xsxsi = np.linalg.inv(xsxs)
+    xsys = np.dot(xs.T, ys)
+    x1 = np.dot(xsxsi, xsys)
+    x2 = np.dot(xsys.T, x1)
+    ee = ysys - x2
+    sig2 = ee[0][0] / n
+    nlsig2 = (n / 2.0) * np.log(sig2)
+    a = I - lam * Wsp
+    LU = SuperLU(a.tocsc())
+    jacob = np.sum(np.log(np.abs(LU.U.diagonal()))) 
+    # this is the negative of the concentrated log lik for minimization
+    clik = nlsig2 - jacob
+    return clik
 
 def err_c_loglik_ord(lam, n, y, ylag, x, xlag, evals):
     # concentrated log-lik for error model, no constants, brute force

diff --git a/econometrics/regimes.py b/econometrics/regimes.py
@@ -75,27 +75,29 @@ class Chow:
 
     def __init__(self, reg):
         kr, kf, kryd, nr, betas, vm = reg.kr, reg.kf, reg.kryd, reg.nr, reg.betas, reg.vm
-        if betas.shape[0] != vm.shape[0]:
-            if kf > 0:
-                betas = betas[0:vm.shape[0], :]
-                kf = kf - 1
-            else:
-                brange = []
-                for i in range(nr):
-                    brange.extend(range(i * (kr + 1), i * (kr + 1) + kr))
-                betas = betas[brange, :]
-        r_global = []
-        regi = np.zeros((reg.kr, 2))
-        for vari in np.arange(kr):
-            r_vari = buildR1var(vari, kr, kf, kryd, nr)
-            r_global.append(r_vari)
-            q = np.zeros((r_vari.shape[0], 1))
-            regi[vari, :] = wald_test(betas, r_vari, q, vm)
-        r_global = np.vstack(tuple(r_global))
-        q = np.zeros((r_global.shape[0], 1))
-        joint = wald_test(betas, r_global, q, vm)
-        self.joint = joint
-        self.regi = regi
+        self.joint, self.regi = _chow_run(reg.kr, reg.kf, reg.kryd, reg.nr, reg.betas, reg.vm)
+
+def _chow_run(kr, kf, kryd, nr, betas, vm):
+    if betas.shape[0] != vm.shape[0]:
+        if kf > 0:
+            betas = betas[0:vm.shape[0], :]
+            kf = kf - 1
+        else:
+            brange = []
+            for i in range(nr):
+                brange.extend(list(range(i * (kr + 1), i * (kr + 1) + kr)))
+            betas = betas[brange, :]
+    r_global = []
+    regi = np.zeros((kr, 2))
+    for vari in np.arange(kr):
+        r_vari = buildR1var(vari, kr, kf, kryd, nr)
+        r_global.append(r_vari)
+        q = np.zeros((r_vari.shape[0], 1))
+        regi[vari, :] = wald_test(betas, r_vari, q, vm)
+    r_global = np.vstack(tuple(r_global))
+    q = np.zeros((r_global.shape[0], 1))
+    joint = wald_test(betas, r_global, q, vm)
+    return joint,regi
 
 
 class Wald: