Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pandas>2 and networkx>3 for simulation module and some unit tests #71

Merged
merged 6 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions causallib/simulation/CausalSimulator3.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def __init__(self, topology, var_types, prob_categories, link_types, snr, treatm

# check that effect modifier is independent on treatment and affects only the outcome:
for i in self.effmod_indices:
successors = self.graph_topology.successors(i)
successors = list(self.graph_topology.successors(i))
if len(successors) == 0 or self.outcome_indices.intersection(successors).size < 1:
raise ValueError("Effect modifier variable {name} must affect an outcome variable".format(name=i))
ancestors = nx.ancestors(self.graph_topology, i)
Expand Down Expand Up @@ -441,7 +441,7 @@ def generate_data(self, X_given=None, num_samples=None, random_seed=None):

# generate latent continuous covariates - every variable is guaranteed to have a population variance of 1.0
# X_latent = pd.DataFrame(index=patients_index, columns=self.var_types.index)
X = pd.DataFrame(index=patients_index, columns=self.var_types.index)
X = pd.DataFrame(index=patients_index, columns=self.var_types.index, dtype=float)
if X_given is not None: # if a dataset is given, integrate it to the current dataset being build.
X.loc[:, X_given.columns] = X_given
for col in X_given.columns:
Expand Down Expand Up @@ -1342,7 +1342,7 @@ def _poly_linking(X_parents, beta=None):
beta = pd.DataFrame(data=np.random.normal(loc=0.0, scale=4.0, size=(degree, X_parents.columns.size)),
columns=X_parents.columns, index=np.arange(degree))

result_polynomial = pd.DataFrame(data=None, index=X_parents.index, columns=X_parents.columns)
result_polynomial = pd.DataFrame(data=None, index=X_parents.index, columns=X_parents.columns, dtype=float)
degrees = beta.index.to_series()
# Apply a polynomial to every parent variable
for var_name, col in X_parents.items():
Expand Down
96 changes: 96 additions & 0 deletions causallib/tests/test_causal_simulator3.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,82 @@ def test_linear_linking(self):
msg="discovered rank of matrix is {emp} instead of {des}."
"so the linear linking does not work properly".format(emp=rank, des=2))

def test_affine_linking(self):
topology = np.zeros((3, 3), dtype=bool)
topology[2, 0] = topology[2, 1] = True
var_types = ["covariate", "treatment", "outcome"]
snr = 1
prob_cat = [None, [0.5, 0.5], None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types="affine", treatment_importances=treatment_importance,
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
eps = 1e-10
rank = np.sum(singular_values > eps)
self.assertEqual(rank, 3,
msg="discovered rank of matrix is {emp} instead of {des}."
"so the affine linking does not work properly".format(emp=rank, des=3))

def test_poly_linking(self):
topology = np.zeros((3, 3), dtype=bool)
topology[2, 0] = topology[2, 1] = True
var_types = ["covariate", "treatment", "outcome"]
snr = 1
prob_cat = [None, [0.5, 0.5], None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types="poly", treatment_importances=treatment_importance,
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
eps = 1e-10
rank = np.sum(singular_values > eps)
self.assertEqual(rank, 3,
msg="discovered rank of matrix is {emp} instead of {des}."
"so the poly linking does not work properly".format(emp=rank, des=3))

def test_exp_linking(self):
topology = np.zeros((3, 3), dtype=bool)
topology[2, 0] = topology[2, 1] = True
var_types = ["covariate", "treatment", "outcome"]
snr = 1
prob_cat = [None, [0.5, 0.5], None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types="exp", treatment_importances=treatment_importance,
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
eps = 1e-10
rank = np.sum(singular_values > eps)
self.assertEqual(rank, 3,
msg="discovered rank of matrix is {emp} instead of {des}."
"so the exp linking does not work properly".format(emp=rank, des=3))

def test_log_linking(self):
topology = np.zeros((3, 3), dtype=bool)
topology[2, 0] = topology[2, 1] = True
var_types = ["covariate", "treatment", "outcome"]
snr = 1
prob_cat = [None, [0.5, 0.5], None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types="log", treatment_importances=treatment_importance,
outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False)
eps = 1e-10
rank = np.sum(singular_values > eps)
self.assertEqual(rank, 3,
msg="discovered rank of matrix is {emp} instead of {des}."
"so the log linking does not work properly".format(emp=rank, des=3))

def test_treatment_logistic(self):
topology = np.zeros((6, 6), dtype=bool)
topology[2, 0] = topology[3, 0] = topology[2, 1] = topology[3, 1] = topology[4, 2] = topology[5, 3] = True
Expand Down Expand Up @@ -533,6 +609,26 @@ def test_censoring(self):
# TODO: test different link types
# TODO: test marginal structural model (both in continuous, dichotomous and probability settings)

def test_effect_modifier(self):
topology = np.zeros((4, 4), dtype=bool)
topology[2, 0] = topology[2, 1] = topology[2, 3] = True
var_types = ["effect_modifier", "treatment", "outcome", "covariate"]
snr = 1
prob_cat = [None, [0.5, 0.5], None, None]
treatment_importance = None
sim = CS3(topology=topology, var_types=var_types, prob_categories=prob_cat,
link_types=["linear","linear","marginal_structural_model","linear"], treatment_importances=treatment_importance,
outcome_types="continuous", snr=snr, effect_sizes=None)
X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES)

beta = sim.linking_coefs
self.assertNotEqual(beta[2].loc[0,0], beta[2].loc[0,1],
msg="coefficients for potential outcomes are the same: {beta_1} = {beta_0}."
"so the effect modifier does not behave properly".format(beta_0=beta[2].loc[0,0], beta_1=beta[2].loc[0,1]))
self.assertEqual(beta[2].loc[3,0], beta[2].loc[3,1],
msg="coefficients for potential outcomes are not the same: {beta_1} != {beta_0}."
"so the covariate does not behave properly".format(beta_0=beta[2].loc[0,0], beta_1=beta[2].loc[0,1]))


if __name__ == "__main__":
unittest.main()
Loading