Skip to content

Commit

Permalink
Add SUR to GeoDaSpace GeoDaCenter#3
Browse files Browse the repository at this point in the history
only SUR codes were added and merged manually into GeoDaSpace: SUR and new spreg code have dependencies on new libpysal, so simple Copy-n-past the new spreg module to GeoDaSpace doesn't work.
  • Loading branch information
lixun910 committed Oct 15, 2018
1 parent b519dcb commit 27892dc
Show file tree
Hide file tree
Showing 14 changed files with 4,004 additions and 43 deletions.
4 changes: 4 additions & 0 deletions econometrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@
from ml_lag_regimes import *
from ml_error import *
from ml_error_regimes import *
from sur import *
from sur_utils import *
from sur_error import *
from sur_lag import *
294 changes: 294 additions & 0 deletions econometrics/diagnostics_sur.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
"""
Diagnostics for SUR and 3SLS estimation
"""

__author__= "Luc Anselin [email protected], \
Pedro V. Amaral [email protected] \
Tony Aburaad [email protected]"


import numpy as np
import scipy.stats as stats
import numpy.linalg as la
from .sur_utils import sur_dict2mat,sur_mat2dict,sur_corr,spdot
from .regimes import buildR1var,wald_test


__all__ = ['sur_setp','sur_lrtest','sur_lmtest','lam_setp','surLMe','surLMlag']


def sur_setp(bigB,varb):
''' Utility to compute standard error, t and p-value
Parameters
----------
bigB : dictionary of regression coefficient estimates,
one vector by equation
varb : variance-covariance matrix of coefficients
Returns
-------
surinfdict : dictionary with standard error, t-value, and
p-value array, one for each equation
'''
vvb = varb.diagonal()
n_eq = len(bigB.keys())
bigK = np.zeros((n_eq,1),dtype=np.int_)
for r in range(n_eq):
bigK[r] = bigB[r].shape[0]
b = sur_dict2mat(bigB)
se = np.sqrt(vvb)
se.resize(len(se),1)
t = np.divide(b,se)
tp = stats.norm.sf(abs(t))*2
surinf = np.hstack((se,t,tp))
surinfdict = sur_mat2dict(surinf,bigK)
return surinfdict

def lam_setp(lam,vm):
"""Standard errors, t-test and p-value for lambda in SUR Error ML
Parameters
----------
lam : n_eq x 1 array with ML estimates for spatial error
autoregressive coefficient
vm : n_eq x n_eq subset of variance-covariance matrix for
lambda and Sigma in SUR Error ML
(needs to be subset from full vm)
Returns
-------
: tuple with arrays for standard error, t-value and p-value
(each element in the tuple is an n_eq x 1 array)
"""
vvb = vm.diagonal()
se = np.sqrt(vvb)
se.resize(len(se),1)
t = np.divide(lam,se)
tp = stats.norm.sf(abs(t))*2
return (se,t,tp)

def sur_lrtest(n,n_eq,ldetS0,ldetS1):
''' Likelihood Ratio test on off-diagonal elements of Sigma
Parameters
----------
n : cross-sectional dimension (number of observations for an equation)
n_eq : number of equations
ldetS0 : log determinant of Sigma for OLS case
ldetS1 : log determinant of Sigma for SUR case (should be iterated)
Returns
-------
(lrtest,M,pvalue) : tupel with value of test statistic (lrtest),
degrees of freedom (M, as an integer)
p-value
'''
M = n_eq * (n_eq - 1)/2.0
lrtest = n * (ldetS0 - ldetS1)
pvalue = stats.chi2.sf(lrtest,M)
return (lrtest,int(M),pvalue)


def sur_lmtest(n,n_eq,sig):
''' Lagrange Multiplier test on off-diagonal elements of Sigma
Parameters
----------
n : cross-sectional dimension (number of observations for an equation)
n_eq : number of equations
sig : inter-equation covariance matrix for null model (OLS)
Returns
-------
(lmtest,M,pvalue) : tupel with value of test statistic (lmtest),
degrees of freedom (M, as an integer)
p-value
'''
R = sur_corr(sig)
tr = np.trace(np.dot(R.T,R))
M = n_eq * (n_eq - 1)/2.0
lmtest = (n/2.0) * (tr - n_eq)
pvalue = stats.chi2.sf(lmtest,M)
return (lmtest,int(M),pvalue)


def surLMe(n_eq,WS,bigE,sig):
"""Lagrange Multiplier test on error spatial autocorrelation in SUR
Parameters
----------
n_eq : number of equations
WS : spatial weights matrix in sparse form
bigE : n x n_eq matrix of residuals by equation
sig : cross-equation error covariance matrix
Returns
-------
(LMe,n_eq,pvalue) : tupel with value of statistic (LMe), degrees
of freedom (n_eq) and p-value
"""
# spatially lagged residuals
WbigE = WS * bigE
# score
EWE = np.dot(bigE.T,WbigE)
sigi = la.inv(sig)
SEWE = sigi * EWE
#score = SEWE.sum(axis=1)
#score.resize(n_eq,1)
# note score is column sum of Sig_i * E'WE, a 1 by n_eq row vector
# previously stored as column
score = SEWE.sum(axis=0)
score.resize(1,n_eq)


# trace terms
WW = WS * WS
trWW = np.sum(WW.diagonal())
WTW = WS.T * WS
trWtW = np.sum(WTW.diagonal())
# denominator
SiS = sigi * sig
Tii = trWW * np.identity(n_eq)
tSiS = trWtW * SiS
denom = Tii + tSiS
idenom = la.inv(denom)
# test statistic
#LMe = np.dot(np.dot(score.T,idenom),score)[0][0]
# score is now row vector
LMe = np.dot(np.dot(score,idenom),score.T)[0][0]
pvalue = stats.chi2.sf(LMe,n_eq)
return (LMe,n_eq,pvalue)

def surLMlag(n_eq,WS,bigy,bigX,bigE,bigYP,sig,varb):
"""Lagrange Multiplier test on lag spatial autocorrelation in SUR
Parameters
----------
n_eq : number of equations
WS : spatial weights matrix in sparse form
bigy : dictionary with y values
bigX : dictionary with X values
bigE : n x n_eq matrix of residuals by equation
bigYP : n x n_eq matrix of predicted values by equation
sig : cross-equation error covariance matrix
varb : variance-covariance matrix for b coefficients (inverse of Ibb)
Returns
-------
(LMlag,n_eq,pvalue) : tupel with value of statistic (LMlag), degrees
of freedom (n_eq) and p-value
"""
# Score
Y = np.hstack((bigy[r]) for r in range(n_eq))
WY = WS * Y
EWY = np.dot(bigE.T,WY)
sigi = la.inv(sig)
SEWE = sigi * EWY
score = SEWE.sum(axis=0) # column sums
score.resize(1,n_eq) # score as a row vector

# I(rho,rho) as partitioned inverse, eq 72
# trace terms
WW = WS * WS
trWW = np.sum(WW.diagonal()) #T1
WTW = WS.T * WS
trWtW = np.sum(WTW.diagonal()) #T2

# I(rho,rho)
SiS = sigi * sig
Tii = trWW * np.identity(n_eq) #T1It
tSiS = trWtW * SiS
firstHalf = Tii + tSiS
WbigYP = WS * bigYP
inner = np.dot(WbigYP.T, WbigYP)
secondHalf= sigi * inner
Ipp= firstHalf + secondHalf #eq. 75

# I(b,b) inverse is varb

# I(b,rho)
bp = sigi[0,] * spdot(bigX[0].T,WbigYP) #initialize
for r in range(1,n_eq):
bpwork = sigi[r,] * spdot(bigX[r].T,WbigYP)
bp = np.vstack((bp,bpwork))
# partitioned part
i_inner = Ipp - np.dot(np.dot(bp.T, varb), bp)
# partitioned inverse of information matrix
Ippi = la.inv(i_inner)

# test statistic
LMlag = np.dot(np.dot(score,Ippi),score.T)[0][0]
# p-value
pvalue = stats.chi2.sf(LMlag,n_eq)
return (LMlag,n_eq,pvalue)

def sur_chow(n_eq,bigK,bSUR,varb):
"""test on constancy of regression coefficients across equations in
a SUR specification
Note: requires a previous check on constancy of number of coefficients
across equations; no other checks are carried out, so it is possible
that the results are meaningless if the variables are not listed in
the same order in each equation.
Parameters
----------
n_eq : integer, number of equations
bigK : array with the number of variables by equation (includes constant)
bSUR : dictionary with the SUR regression coefficients by equation
varb : array with the variance-covariance matrix for the SUR regression
coefficients
Returns
-------
test : a list with for each coefficient (in order) a tuple with the
value of the test statistic, the degrees of freedom, and the
p-value
"""
kr = bigK[0][0]
test = []
bb = sur_dict2mat(bSUR)
kf = 0
nr = n_eq
df = n_eq - 1
for i in range(kr):
Ri = buildR1var(i,kr,kf,0,nr)
tt,p = wald_test(bb,Ri,np.zeros((df,1)),varb)
test.append((tt,df,p))
return test

def sur_joinrho(n_eq,bigK,bSUR,varb):
"""Test on joint significance of spatial autoregressive coefficient in SUR
Parameters
----------
n_eq : integer, number of equations
bigK : n_eq x 1 array with number of variables by equation
(includes constant term, exogenous and endogeneous and
spatial lag)
bSUR : dictionary with regression coefficients by equation, with
the spatial autoregressive term as last
varb : variance-covariance matrix for regression coefficients
Returns
-------
: tuple with test statistic, degrees of freedom, p-value
"""
bb = sur_dict2mat(bSUR)
R = np.zeros((n_eq,varb.shape[0]))
q = np.zeros((n_eq,1))
kc = -1
for i in range(n_eq):
kc = kc + bigK[i]
R[i,kc] = 1
w,p = wald_test(bb,R,q,varb)
return (w,n_eq,p)
22 changes: 22 additions & 0 deletions econometrics/ml_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,28 @@ def err_c_loglik(lam, n, y, ylag, x, xlag, W):
clik = nlsig2 - jacob
return clik

def err_c_loglik_sp(lam, n, y, ylag, x, xlag, I, Wsp):
# concentrated log-lik for error model, no constants, LU
if isinstance(lam, np.ndarray):
if lam.shape == (1,1):
lam = lam[0][0] #why does the interior value change?
ys = y - lam * ylag
xs = x - lam * xlag
ysys = np.dot(ys.T, ys)
xsxs = np.dot(xs.T, xs)
xsxsi = np.linalg.inv(xsxs)
xsys = np.dot(xs.T, ys)
x1 = np.dot(xsxsi, xsys)
x2 = np.dot(xsys.T, x1)
ee = ysys - x2
sig2 = ee[0][0] / n
nlsig2 = (n / 2.0) * np.log(sig2)
a = I - lam * Wsp
LU = SuperLU(a.tocsc())
jacob = np.sum(np.log(np.abs(LU.U.diagonal())))
# this is the negative of the concentrated log lik for minimization
clik = nlsig2 - jacob
return clik

def err_c_loglik_ord(lam, n, y, ylag, x, xlag, evals):
# concentrated log-lik for error model, no constants, brute force
Expand Down
44 changes: 23 additions & 21 deletions econometrics/regimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,27 +75,29 @@ class Chow:

def __init__(self, reg):
kr, kf, kryd, nr, betas, vm = reg.kr, reg.kf, reg.kryd, reg.nr, reg.betas, reg.vm
if betas.shape[0] != vm.shape[0]:
if kf > 0:
betas = betas[0:vm.shape[0], :]
kf = kf - 1
else:
brange = []
for i in range(nr):
brange.extend(range(i * (kr + 1), i * (kr + 1) + kr))
betas = betas[brange, :]
r_global = []
regi = np.zeros((reg.kr, 2))
for vari in np.arange(kr):
r_vari = buildR1var(vari, kr, kf, kryd, nr)
r_global.append(r_vari)
q = np.zeros((r_vari.shape[0], 1))
regi[vari, :] = wald_test(betas, r_vari, q, vm)
r_global = np.vstack(tuple(r_global))
q = np.zeros((r_global.shape[0], 1))
joint = wald_test(betas, r_global, q, vm)
self.joint = joint
self.regi = regi
self.joint, self.regi = _chow_run(reg.kr, reg.kf, reg.kryd, reg.nr, reg.betas, reg.vm)

def _chow_run(kr, kf, kryd, nr, betas, vm):
if betas.shape[0] != vm.shape[0]:
if kf > 0:
betas = betas[0:vm.shape[0], :]
kf = kf - 1
else:
brange = []
for i in range(nr):
brange.extend(list(range(i * (kr + 1), i * (kr + 1) + kr)))
betas = betas[brange, :]
r_global = []
regi = np.zeros((kr, 2))
for vari in np.arange(kr):
r_vari = buildR1var(vari, kr, kf, kryd, nr)
r_global.append(r_vari)
q = np.zeros((r_vari.shape[0], 1))
regi[vari, :] = wald_test(betas, r_vari, q, vm)
r_global = np.vstack(tuple(r_global))
q = np.zeros((r_global.shape[0], 1))
joint = wald_test(betas, r_global, q, vm)
return joint,regi


class Wald:
Expand Down
Loading

0 comments on commit 27892dc

Please sign in to comment.