From 7676d250f4f5ef0d770ebde8048d3afe992a4fe6 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Fri, 14 Oct 2022 18:25:34 +0200
Subject: [PATCH 1/6] some fixes in tests

---
 tests/bootalgo-tests.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)
diff --git a/tests/bootalgo-tests.py b/tests/bootalgo-tests.py
index 7ce479c..0bcb2d5 100644
--- a/tests/bootalgo-tests.py
+++ b/tests/bootalgo-tests.py
@@ -8,7 +8,7 @@ def WCR11_not_WCU11():
   G= 20
   X = np.random.normal(0, 1, N * k).reshape((N,k))
   beta = np.random.normal(0,1,k)
-  beta[0] = 1
+  beta[0] = 0.1
   u = np.random.normal(0,1,N)
   y = 1 + X @ beta + u
   cluster = np.random.choice(list(range(0,G)), N)
@@ -22,26 +22,20 @@ def WCR11_not_WCU11():
   wcr.get_weights(weights_type = "rademacher")
   wcr.get_numer()
   wcr.get_denom()
-  wcr.numer
-  wcr.denom
   wcr.get_tboot()
-  wcr.t_boot
   wcr.get_vcov()
   wcr.get_tstat()
   wcr.get_pvalue(pval_type = "two-tailed")
   
-  wcu = Wildboottest(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341)
-  wcu.get_scores(bootstrap_type = "11", impose_null = True)
-  wcr.get_weights(weights_type = "rademacher")
+  wcu = wb.Wildboottest(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341)
+  wcu.get_scores(bootstrap_type = "11", impose_null = False)
+  wcu.get_weights(weights_type = "rademacher")
   wcu.get_numer()
   wcu.get_denom()
-  wcu.numer
-  wcu.denom
   wcu.get_tboot()
-  wcu.t_boot
   wcu.get_vcov()
   wcu.get_tstat()
-  wcu-(pval_type = "two-tailed")
+  wcu.get_pvalue(pval_type = "two-tailed")
 
   # score matrices of WCR11 and WCU11 should be different - currently not the case
   assert not np.array_equal(wcr.scores_mat, wcu.scores_mat)

From 146888a8f377a20dbc343f70fbaa2ae5384575d1 Mon Sep 17 00:00:00 2001
From: Alexander Fischer <alexander-fischer1801@t-online.de>
Date: Sat, 15 Oct 2022 12:07:09 +0200
Subject: [PATCH 2/6] add wildboottest function

---
 readme.md                                     |  51 +++++++++---
 tests/bootalgo-tests.py                       |  40 ++++-----
 .../__pycache__/wildboottest.cpython-36.pyc   | Bin 8349 -> 10459 bytes
 wildboottest/wildboottest.py                  |  77 ++++++++++++++++++
 4 files changed, 138 insertions(+), 30 deletions(-)

diff --git a/readme.md b/readme.md
index 42daabb..5d6b31e 100644
--- a/readme.md
+++ b/readme.md
@@ -12,27 +12,61 @@ If you'd like to cooperate, either send us an
 
 ## Example 
 
-Note: everything is still very much work in progress, and there are multiple errors in the code that I am aware of. Still, I believe that the implementation of the WCR11 is more or less correct.
+Note: everything is still very much work in progress. Still, I believe that the implementation of the WCR11 and WCU 11 is more or less correct.
 
 ```
-import wildboottest.wildboottest as wb
+from wildboottest.wildboottest import wildboottest, Wildboottest
+import statsmodels.api as sm
 import numpy as np
 import timeit 
 import time
 
-N = 100000
-k = 100
-G= 50
+N = 1000
+k = 10
+G= 12
 X = np.random.normal(0, 1, N * k).reshape((N,k))
 beta = np.random.normal(0,1,k)
 beta[0] = 0.005
 u = np.random.normal(0,1,N)
 Y = 1 + X @ beta + u
 cluster = np.random.choice(list(range(0,G)), N)
+B = 99999
+
+
+model = sm.OLS(Y, X)
+model.exog
+results = model.fit(cov_type = 'cluster', cov_kwds = {
+   'groups': cluster
+})
+results.summary()
+# >>> results.summary()
+# <class 'statsmodels.iolib.summary.Summary'>
+# """
+#                                  OLS Regression Results                                
+# =======================================================================================
+# Dep. Variable:                      y   R-squared (uncentered):                   0.799
+# Model:                            OLS   Adj. R-squared (uncentered):              0.797
+# Method:                 Least Squares   F-statistic:                              790.3
+# Date:                Sat, 15 Oct 2022   Prob (F-statistic):                    3.16e-14
+# Time:                        12:03:43   Log-Likelihood:                         -1784.6
+# No. Observations:                1000   AIC:                                      3589.
+# Df Residuals:                     990   BIC:                                      3638.
+# Df Model:                          10                                                  
+# Covariance Type:              cluster                                                  
+# ==============================================================================
+#                  coef    std err          z      P>|z|      [0.025      0.975]
+# ------------------------------------------------------------------------------
+# x1             0.0128      0.064      0.200      0.841      -0.113       0.138
+wildboottest(model, "X1", cluster, B)
+# 0.8408408408408409
+
+
+# execute, method by method
+
+# some preparations
 bootcluster = cluster
 R = np.zeros(k)
 R[0] = 1
-B = 99999
 
 start_time = timeit.default_timer()
 wcr = wb.Wildboottest(X = X, Y = Y, cluster = cluster, bootcluster = bootcluster, R = R, B = B, seed = 12341)
@@ -40,15 +74,12 @@ wcr.get_scores(bootstrap_type = "11", impose_null = True)
 wcr.get_weights(weights_type = "rademacher")
 wcr.get_numer()
 wcr.get_denom()
-wcr.numer
-wcr.denom
 wcr.get_tboot()
-wcr.t_boot
 wcr.get_vcov()
 wcr.get_tstat()
 wcr.get_pvalue(pval_type = "two-tailed")
 print("estimation time:", timeit.default_timer() - start_time)
 # >>> 0.9225496 seconds
 print("p value:", wcr.pvalue)
-# >>> p value: 0.15258152581525816
+# >>> p value: 0.8408408408408409
 ```
diff --git a/tests/bootalgo-tests.py b/tests/bootalgo-tests.py
index 0bcb2d5..e964fcb 100644
--- a/tests/bootalgo-tests.py
+++ b/tests/bootalgo-tests.py
@@ -17,15 +17,15 @@ def WCR11_not_WCU11():
   R[0] = 1
   B = 999
 
-  wcr = wb.Wildboottest(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341)
-  wcr.get_scores(bootstrap_type = "11", impose_null = True)
-  wcr.get_weights(weights_type = "rademacher")
-  wcr.get_numer()
-  wcr.get_denom()
-  wcr.get_tboot()
-  wcr.get_vcov()
-  wcr.get_tstat()
-  wcr.get_pvalue(pval_type = "two-tailed")
+  boot = Wildboottest(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341)
+  boot.get_scores(bootstrap_type = "11", impose_null = True)
+  boot.get_weights(weights_type = "rademacher")
+  boot.get_numer()
+  boot.get_denom()
+  boot.get_tboot()
+  boot.get_vcov()
+  boot.get_tstat()
+  boot.get_pvalue(pval_type = "two-tailed")
   
   wcu = wb.Wildboottest(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341)
   wcu.get_scores(bootstrap_type = "11", impose_null = False)
@@ -38,10 +38,10 @@ def WCR11_not_WCU11():
   wcu.get_pvalue(pval_type = "two-tailed")
 
   # score matrices of WCR11 and WCU11 should be different - currently not the case
-  assert not np.array_equal(wcr.scores_mat, wcu.scores_mat)
-  assert not np.array_equal(wcr.t_boot, wcu.t_boot)
-  assert np.array_equal(wcr.t_stat, wcu.t_stat)
-  assert not np.array_equal(wcr.pvalue, wcu.pvalue) # unless both pvals are zero or 1...
+  assert not np.array_equal(boot.scores_mat, wcu.scores_mat)
+  assert not np.array_equal(boot.t_boot, wcu.t_boot)
+  assert np.array_equal(boot.t_stat, wcu.t_stat)
+  assert not np.array_equal(boot.pvalue, wcu.pvalue) # unless both pvals are zero or 1...
 
 
 def test_r_vs_py():
@@ -74,14 +74,14 @@ def full_enum_works():
   B = 99999
   
   wcr = Wildboottest(X = X, Y = Y, cluster = cluster, bootcluster = bootcluster, R = R, B = B, seed = 12341)
-  wcr.get_scores(bootstrap_type = "11", impose_null = False)
-  wcr.get_weights(weights_type = "rademacher")
-  wcr.get_numer()
-  wcr.get_denom()
-  wcr.get_tboot()
+  boot.get_scores(bootstrap_type = "11", impose_null = False)
+  boot.get_weights(weights_type = "rademacher")
+  boot.get_numer()
+  boot.get_denom()
+  boot.get_tboot()
   
-  assert len(wcr.t_boot) == 2**G
-  assert wcr.full_enumeration == True
+  assert len(boot.t_boot) == 2**G
+  assert boot.full_enumeration == True
   
 
 
diff --git a/wildboottest/__pycache__/wildboottest.cpython-36.pyc b/wildboottest/__pycache__/wildboottest.cpython-36.pyc
index 65e375553e864b4ba255b05e6d7808442764d2e9..e39e10036a478c4cdb434fefc73017c0e8e32066 100644
GIT binary patch
delta 2812
zcmai0O>7%Q6!xrt<NP*hnxq6Wq@uQ4*RmTxv}&L!{}oE1ji7`dckG?m>#ldVv*Y}f
zH5P<Qh^pdeL8_3rao~gm0mL1N6IU+F0U-|DIUsR@_hvVBR9dn2e)HbUdvD%*GxK&n
zTl(dNLkq*h+3(*uTmSlzSnT)Mo+kzLX?Wc)KRopL=!`z4PrpM}Cv|cmRyg?8PmiX_
zGyY&^G5f0K#i~howVmfO-zG`0qmBF(C(rwT@B0JrxABR?FM*jtS{#8!yoj)ha1`Mc
zgjW$>^M9R~_K#0&A9(@U5r81!UJ+8Dx|l*fC95^tmIW=eb!q3<iBp8E`m>Wij#W`=
z24NbZzVqj#LGD{a`Vv5pa@LvI;X%A@ScQZ#5cpeD4bt>~n_8SZhl1x3UPoXE9HD`5
z#b2DhHYkvE5hU=_`__S+B9I~thzt?y{$JDA$%g;-OojCPZ)QGyVhe?@0tCbnZy<9E
z3Mmz%7{gu<p^LDDfU!W38%V$De{`@+w*4Oto;Zx=z<10|UoS{mwrDf6uP}elVRFrX
z_MU4`yoHiDzfh{_+f>z*!C>9C>P&JAUNyCHqHk_|wmAE4ICBq8Z6jPmP`ye2%G`cU
zZ2HdZ40+psZ<ft_$<~0Ln2B}cGTzF1Nj<J7;FrFU=%&1s7jI}g|IAKmB@D1Yf&|+%
zT$P!^U@*R6n0i$gY@@o?v8uXJmqEP2C1m%^+yhB52|9jb?p*Hx5V5hWmLZu`f+XVe
zi4igs&ysOXJP0EzfmW&1GbjWPW_bYK$KiFq1CX&6@yIsm5---$x*5QDcK|Ta%>pJ{
zsqUa^)7>GNX${D%H@J=R!#eSXy}?#*5Y<n`Jgqw-hrEn8;A!3n48^^)uH8s?N4?S3
zFhmQ{-_S@5xDgMsaDym%bUUWnv8de_wc}Ac5w-jEq@Id&CgJF%*_X*!EK6zce8-}U
zVzy}A?6|OY6c2|h1#@VPxm>3<NLbXawRl}pdky56>(ZPnnRMH>&P}%@ctdd4HEgS7
z8Wv}wss>8up1Y8PS>V4U8tzFXC{;uGP`&s6`h}BpiSC(<_9u&+QdS)%SR1U0NM0CL
z1HCo5)sRgHf)^=lQOOJoM`V*z$=#l$N|9sWAZ=Jca91jmQ9|;TQ({7}EoIs2w4E)g
zcvR4hHn%YFJI)YUzzNrxFj&pxC1v_d9%cf2WW|5<zWYy6X;aa$)X)Yu8cpd|<(9*v
zcvjF^U~$6!>O_Km_1^j32Cvm})D|=s9^+go%r)lwwkIc;&fBcs<RV9FTU6(3tYga1
zFQe_)F0Wc0)6AQ8!>B_=I7u%w?G9`Z%o&c*=9Xgv;~FllaSA#;P*NKOMfx7<4X21s
zS+;yk#de!35xXKfpw;&zbp4i@ekMw4C;WG1OFg<|n)Zf^d8{{W@J2a%%S4#HySo5t
z5D2V5i9TsS43Ni4C0C@m1%;J`Ft-Z}kXIbp>n&RCQI~UFt!as#b0qEuu;~bU-OxE4
zFu^R{ZY!y1WkBsbmmOj49*d>BoI(dO>l{;`oxbXmo2>1aJUUU?9%d5PPRPASrNkVA
zGMBn-6?!BP?O>N$PFD59{to>ssVFMP7u3I?2AAm~t(41U#avd5BA>=?MO~zeJ+QTF
zBiw-ba*<YwbeSHZmkUrI+$MwVoL??pE`)Y#Tr#kB2fYf&D8nf7O8GUADwoRThg4tZ
zjyk<!rMMg_y^Jo>qx2#@4QmQ>9Hkwlxm(`$&Ix<PsPnvOxH1nZXmD`*=|TbgE`+I3
zwF)e{ZLDSamy7gbG=h5z3W&_3$SJcJ))5e$zgU@vQWF)^#a*gM&%odbctStHCjm7W
zYO()C>QE0pl&66cXR(Wj1!%5j<%WF>3YN)r=(2F+KoEx#7bIb`bFrG>n|5P0$zg9b
zf^<|5f`rZ_3sRdFYjZb9ZK*c=W*@{U#sPw)Vd;ERB(MW+Exnj{7|FZeU2xO{D2%Np
zRaSzes+?j7N2U<aX%#>i%06WAb1AY2A0d2<@CkytUkwAbnt)W+q$pyqf@44hL-<YF
zP4XJvj}Ak3teDkO>IWZeZG?=Z6WRd+pKJnr^c&K|O^}2u4qh}?#QT8Dg#-T2b5AYe
vWlM|#1ZhiGMO+Epb{i4bQS1X0L+4C&6%aS!x;Evv=qQ=@uhK)>^wEC-bhhL4

delta 789
zcmZva-)qxQ6vuOuH0`>rmUOl4)>&DxG;_*U2P!ymV+g)DhbXj8v#jY#rAynJblo&c
znGF1ah~Bq>FS36?iv9)SKOpQOKKkI}*o)8JZw6Mxg!{=km-9X6o^$hk`TczM!tikT
zqIsj-IwuG}gufnz+t>KA_upps1LU@##XlZg4Uz>G56#K0pa*&@p-%x)>Zy@m5?Nvc
z>QBxiHI}=}Wm=F<@(;ZO6oIoq3AhKWu)|oAEySKp-hvz9@MOD19SS*3z*ii7t<i8a
z@i>>>JB(c+q{yb?U!xBYl>(B$T2DM#Bd02`b&itK+%)R#iYIN<EKT+kl-P?zl~mbL
zVlHzNfy=;sU==Wdb--e8@(E@pt-%J|CeXs3N^Nc@2%n3{jS!(WQ&O*p!``K4h|9jD
z_7*o0-Qn;^lWxKJ15NQ8rBUo|1CM}t01fd@9oUcAhtX-$W#2|G=a7y!fC2S&o?<oV
zhEeYe3S&94!-`|Oi%$`W`qY;O|B?D7y}@$BDjQD4;>$1XAEKXbB0W{uJ+AB`RTtO+
z{M`VZhCKtsfCA8F-E@l2VlQosIMTxbH(*Mp%-`T^`HA8xz47t5cpe>U#FLG!n(dE9
z3wvs(RyTE%8XbMTZRuvM?08bO;_&gVX3nsenWfAKmkH6Z7$PA>CbBdkN63(<igXtF
u;%q2?{Sq#Rp5X9;mT6FGY}5aoo<2u}WINQ8`Ttd?v;1mv<TU%7&;ACb4Y88|

diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py
index 0a75e66..fb5edec 100644
--- a/wildboottest/wildboottest.py
+++ b/wildboottest/wildboottest.py
@@ -355,4 +355,81 @@ def draw_weights(t : str, full_enumeration: bool, N_G_bootcluster: int, boot_ite
     return [v, boot_iter]
   
   
+def wildboottest(model, param, cluster, B, weights_type = 'rademacher',impose_null = True, bootstrap_type = '11', seed = None):
+  
+  '''
+  Run a wild cluster bootstrap based on an object of class 'statsmodels.regression.linear_model.OLS'
+  
+  Args: 
+    model(statsmodels.regression.linear_model.OLS'): A statsmodels regression object
+    param(str): A string of length one, containing the test parameter of interest
+    cluster(np.array): A numpy array of dimension one, containing the clustering variable.
+    B(int): The number of bootstrap iterations to run
+    weights_type(str): The type of bootstrap weights. Either 'rademacher', 'mammen', 'webb' or 'normal'. 
+                       'rademacher' by default.
+    impose_null(logical): Should the null hypothesis be imposed on the bootstrap dgp, or not?
+                          True by default. 
+    bootstrap_type(str). A string of length one. Allows to choose the bootstrap type 
+                         to be run. Either '11', '31', '13' or '33'. '11' by default.
+    seed(int). Option to provide a random seed. 
+    
+  Returns: 
+    A wild cluster bootstrapped p-value. 
+    
+  Example: 
+    
+    import statsmodels.api as sm
+    import numpy as np
+
+    np.random.seed(12312312)
+    N = 1000
+    k = 10
+    G= 10
+    X = np.random.normal(0, 1, N * k).reshape((N,k))
+    beta = np.random.normal(0,1,k)
+    beta[0] = 0.005
+    u = np.random.normal(0,1,N)
+    Y = 1 + X @ beta + u
+    cluster = np.random.choice(list(range(0,G)), N)
+
+    model = sm.OLS(Y, X)
+
+    boottest(model, param = "X1", cluster = cluster, B = 9999)
+    
+  '''
+
+  # set param to lowercase? model.data.xnames all seem to be lowercase?
+  param = str.lower(param)
+  # does model.exog already exclude missing values?
+  X = model.exog
+  # interestingly, the dependent variable is called 'endogeneous'
+  Y = model.endog
+  # weights not yet used, only as a placeholder
+  weights = model.weights
+  
+  xnames = model.data.xnames
+  ynames = model.data.ynames
+  
+  R = np.zeros(len(xnames))
+  R[xnames.index(param)] = 1
+  
+  # is it possible to fetch the clustering variables from the pre-processed data 
+  # frame, e.g. with 'excluding' observations with missings etc
+  # cluster = ...
+  
+  # set bootcluster == cluster for one-way clustering
+  bootcluster = cluster
+  
+  boot = Wildboottest(X = X, Y = Y, cluster = cluster, bootcluster = bootcluster, R = R, B = B, seed = seed)
+  boot.get_scores(bootstrap_type = bootstrap_type, impose_null = impose_null)
+  boot.get_weights(weights_type = weights_type)
+  boot.get_numer()
+  boot.get_denom()
+  boot.get_tboot()
+  boot.get_vcov()
+  boot.get_tstat()
+  boot.get_pvalue(pval_type = "two-tailed")
+  
+  return boot.pvalue
+  
 

From e4382d5ee350e910833fac45bcc65f17f741325e Mon Sep 17 00:00:00 2001
From: Aleksandr Michuda <amichuda@gmail.com>
Date: Mon, 17 Oct 2022 18:27:15 -0400
Subject: [PATCH 3/6] add test to bottom of file

---
 wildboottest/wildboottest.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py
index fb5edec..e62bd99 100644
--- a/wildboottest/wildboottest.py
+++ b/wildboottest/wildboottest.py
@@ -412,6 +412,8 @@ def wildboottest(model, param, cluster, B, weights_type = 'rademacher',impose_nu
   
   R = np.zeros(len(xnames))
   R[xnames.index(param)] = 1
+  # Just test for beta=0
+  # R = np.identity(len(xnames))
   
   # is it possible to fetch the clustering variables from the pre-processed data 
   # frame, e.g. with 'excluding' observations with missings etc
@@ -432,4 +434,33 @@ def wildboottest(model, param, cluster, B, weights_type = 'rademacher',impose_nu
   
   return boot.pvalue
   
+if __name__ == '__main__':
+    import statsmodels.api as sm
+    import numpy as np
 
+    np.random.seed(12312312)
+    N = 1000
+    k = 10
+    G= 10
+    X = np.random.normal(0, 1, N * k).reshape((N,k))
+    beta = np.random.normal(0,1,k)
+    beta[0] = 0.005
+    u = np.random.normal(0,1,N)
+    Y = 1 + X @ beta + u
+    cluster = np.random.choice(list(range(0,G)), N)
+
+    model = sm.OLS(Y, X)
+    
+    model.fit(cov_type='wildclusterbootstrap',
+              cov_kwds = {'cluster' : cluster,
+                          'B' : 9999,
+                          'weights_type' : 'rademacher',
+                          'impose_null' : True, 
+                          'bootstrap_type' : '11', 
+                          'seed' : None,
+                          'param' : 'X1'}).summary()
+    
+    
+
+    # boottest(model, param = "X1", cluster = cluster, B = 9999)
+    

From 36c6edf7b876491893972908ba3e9efe7c682691 Mon Sep 17 00:00:00 2001
From: Aleksandr Michuda <amichuda@gmail.com>
Date: Tue, 18 Oct 2022 13:46:33 -0400
Subject: [PATCH 4/6] small change to wildboottest and gitignore

---
 .gitignore                   | 4 +++-
 wildboottest/wildboottest.py | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 571e4e6..7dd8363 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,6 @@
 .Rhistory
 .RData
 .Ruserdata
-.Rproj
\ No newline at end of file
+.Rproj
+__pycache__/
+*.egg-info/
\ No newline at end of file
diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py
index e62bd99..1098192 100644
--- a/wildboottest/wildboottest.py
+++ b/wildboottest/wildboottest.py
@@ -49,8 +49,7 @@ def __init__(self, X, Y, cluster, bootcluster, R, B, seed = None):
       self.N_G_bootcluster = len(bootclustid)
       self.G  = len(clustid)
 
-      k = R.shape[0]
-      self.k = k 
+      self.k = R.shape[0]
       self.B = B
       self.X = X
       self.R = R

From 4bc72df021d12aa7301d624e91c74d73d186f5e2 Mon Sep 17 00:00:00 2001
From: Aleksandr Michuda <amichuda@gmail.com>
Date: Tue, 18 Oct 2022 16:34:41 -0400
Subject: [PATCH 5/6] run boostrapping for all variables

---
 wildboottest/wildboottest.py | 70 +++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py
index 1098192..e65ef21 100644
--- a/wildboottest/wildboottest.py
+++ b/wildboottest/wildboottest.py
@@ -60,8 +60,8 @@ def __init__(self, X, Y, cluster, bootcluster, R, B, seed = None):
       y_list = []
       tXgXg_list = []
       tXgyg_list = []
-      tXX = np.zeros((k, k))
-      tXy = np.zeros(k)
+      tXX = np.zeros((self.k, self.k))
+      tXy = np.zeros(self.k)
       
       #all_cluster = np.unique(bootcluster)
       
@@ -354,7 +354,7 @@ def draw_weights(t : str, full_enumeration: bool, N_G_bootcluster: int, boot_ite
     return [v, boot_iter]
   
   
-def wildboottest(model, param, cluster, B, weights_type = 'rademacher',impose_null = True, bootstrap_type = '11', seed = None):
+def wildboottest(model, cluster, B, weights_type = 'rademacher',impose_null = True, bootstrap_type = '11', seed = None):
   
   '''
   Run a wild cluster bootstrap based on an object of class 'statsmodels.regression.linear_model.OLS'
@@ -397,8 +397,6 @@ def wildboottest(model, param, cluster, B, weights_type = 'rademacher',impose_nu
     
   '''
 
-  # set param to lowercase? model.data.xnames all seem to be lowercase?
-  param = str.lower(param)
   # does model.exog already exclude missing values?
   X = model.exog
   # interestingly, the dependent variable is called 'endogeneous'
@@ -409,29 +407,41 @@ def wildboottest(model, param, cluster, B, weights_type = 'rademacher',impose_nu
   xnames = model.data.xnames
   ynames = model.data.ynames
   
-  R = np.zeros(len(xnames))
-  R[xnames.index(param)] = 1
-  # Just test for beta=0
-  # R = np.identity(len(xnames))
+  pvalues = []
+  tstats = []
   
-  # is it possible to fetch the clustering variables from the pre-processed data 
-  # frame, e.g. with 'excluding' observations with missings etc
-  # cluster = ...
+  # hack to make a single param specified allow to be looped.
+  if isinstance(param, str):
+    param = [param]
   
-  # set bootcluster == cluster for one-way clustering
-  bootcluster = cluster
-  
-  boot = Wildboottest(X = X, Y = Y, cluster = cluster, bootcluster = bootcluster, R = R, B = B, seed = seed)
-  boot.get_scores(bootstrap_type = bootstrap_type, impose_null = impose_null)
-  boot.get_weights(weights_type = weights_type)
-  boot.get_numer()
-  boot.get_denom()
-  boot.get_tboot()
-  boot.get_vcov()
-  boot.get_tstat()
-  boot.get_pvalue(pval_type = "two-tailed")
+  for param in xnames:
+    R = np.zeros(len(xnames))
+    R[xnames.index(param)] = 1
+    # Just test for beta=0
+    # R = np.identity(len(xnames))
+    
+    # is it possible to fetch the clustering variables from the pre-processed data 
+    # frame, e.g. with 'excluding' observations with missings etc
+    # cluster = ...
+    
+    # set bootcluster == cluster for one-way clustering
+    bootcluster = cluster
+    
+    boot = Wildboottest(X = X, Y = Y, cluster = cluster, bootcluster = bootcluster, 
+                        R = R, B = B, seed = seed)
+    boot.get_scores(bootstrap_type = bootstrap_type, impose_null = impose_null)
+    boot.get_weights(weights_type = weights_type)
+    boot.get_numer()
+    boot.get_denom()
+    boot.get_tboot()
+    boot.get_vcov()
+    boot.get_tstat()
+    boot.get_pvalue(pval_type = "two-tailed")
+    
+    pvalues.append(boot.pvalue)
+    tstats.append(boot.t_stat)
   
-  return boot.pvalue
+  return np.array(pvalues), np.array(tstats).flatten()
   
 if __name__ == '__main__':
     import statsmodels.api as sm
@@ -450,14 +460,18 @@ def wildboottest(model, param, cluster, B, weights_type = 'rademacher',impose_nu
 
     model = sm.OLS(Y, X)
     
-    model.fit(cov_type='wildclusterbootstrap',
+    print("--- WCB ---")
+    print(model.fit().summary())
+    
+    print("--- NonRobust ---")
+    print(model.fit(cov_type='wildclusterbootstrap',
               cov_kwds = {'cluster' : cluster,
                           'B' : 9999,
                           'weights_type' : 'rademacher',
                           'impose_null' : True, 
                           'bootstrap_type' : '11', 
-                          'seed' : None,
-                          'param' : 'X1'}).summary()
+                          'seed' : None}).summary())
+  
     
     
 

From 5ad5410d4a1c349a9997041f957652ea92eef2bb Mon Sep 17 00:00:00 2001
From: Aleksandr Michuda <amichuda@gmail.com>
Date: Tue, 18 Oct 2022 17:01:57 -0400
Subject: [PATCH 6/6] allow pandas dataframes as input

---
 wildboottest/wildboottest.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py
index e65ef21..eab1759 100644
--- a/wildboottest/wildboottest.py
+++ b/wildboottest/wildboottest.py
@@ -354,7 +354,7 @@ def draw_weights(t : str, full_enumeration: bool, N_G_bootcluster: int, boot_ite
     return [v, boot_iter]
   
   
-def wildboottest(model, cluster, B, weights_type = 'rademacher',impose_null = True, bootstrap_type = '11', seed = None):
+def wildboottest(model, cluster, B, param = None, weights_type = 'rademacher',impose_null = True, bootstrap_type = '11', seed = None):
   
   '''
   Run a wild cluster bootstrap based on an object of class 'statsmodels.regression.linear_model.OLS'
@@ -410,15 +410,11 @@ def wildboottest(model, cluster, B, weights_type = 'rademacher',impose_null = Tr
   pvalues = []
   tstats = []
   
-  # hack to make a single param specified allow to be looped.
-  if isinstance(param, str):
-    param = [param]
-  
-  for param in xnames:
+  def generate_stats(param):
+
     R = np.zeros(len(xnames))
     R[xnames.index(param)] = 1
     # Just test for beta=0
-    # R = np.identity(len(xnames))
     
     # is it possible to fetch the clustering variables from the pre-processed data 
     # frame, e.g. with 'excluding' observations with missings etc
@@ -440,6 +436,16 @@ def wildboottest(model, cluster, B, weights_type = 'rademacher',impose_null = Tr
     
     pvalues.append(boot.pvalue)
     tstats.append(boot.t_stat)
+    
+    return pvalues, tstats
+    
+  if param is None:
+    for x in xnames:
+      pvalues, tstats = generate_stats(x)
+  elif isinstance(param, str):
+    pvalues, tstats = generate_stats(param)
+  else:
+    raise Exception("`param` not correctly specified")
   
   return np.array(pvalues), np.array(tstats).flatten()
   
@@ -457,8 +463,12 @@ def wildboottest(model, cluster, B, weights_type = 'rademacher',impose_null = Tr
     u = np.random.normal(0,1,N)
     Y = 1 + X @ beta + u
     cluster = np.random.choice(list(range(0,G)), N)
+    
+    X_df = pd.DataFrame(data=X, columns = [f"col_{i}" for i in range(k)])
+    
+    Y_df = pd.DataFrame(data=Y, columns = ['outcome'])
 
-    model = sm.OLS(Y, X)
+    model = sm.OLS(Y_df, X_df)
     
     print("--- WCB ---")
     print(model.fit().summary())