Skip to content

Commit

Permalink
Merge pull request BiomedSciAI#5 from CausalDev/restructure_evaluation
Browse files Browse the repository at this point in the history
Evaluation module
  • Loading branch information
ehudkr authored and GitHub Enterprise committed Feb 21, 2019
2 parents f0d2f73 + dacb64a commit 5ed91fc
Show file tree
Hide file tree
Showing 13 changed files with 2,622 additions and 26 deletions.
20 changes: 8 additions & 12 deletions causallib/estimation/BaseEstimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ def estimate_effect(self, outcome_1, outcome_2, effect_types="diff"):
A DataFrame if individual effect (input is a vector) where columns are effects
types and rows are effect in each individual.
Always: Value type is same is outcome_1 and outcome_2 type.
Examples:
>> estimate_effect(0.3, 0.6)
>> {"diff": -0.3, # 0.3 - 0.6
"ratio": 0.5, # 0.3 / 0.6
"or": 0.2857} # Odds-Ratio(0.3, 0.6)
Examples:
>>> from causallib.estimation.BaseEstimator import EffectEstimator
>>> effect_estimator = EffectEstimator()
>>> effect_estimator.estimate_effect(0.3, 0.6)
>>> {"diff": -0.3, # 0.3 - 0.6
"ratio": 0.5, # 0.3 / 0.6
"or": 0.2857} # Odds-Ratio(0.3, 0.6)
"""
effect_types = [effect_types] if isscalar(effect_types) else effect_types
results = {}
Expand All @@ -68,8 +70,7 @@ def estimate_effect(self, outcome_1, outcome_2, effect_types="diff"):
return results


# TODO: maybe remove the inheritance from abc.ABC. Leave only the method decorator.
class PopulationOutcomeEstimator(abc.ABC, EffectEstimator):
class PopulationOutcomeEstimator(EffectEstimator):
"""
Interface for estimating aggregated outcome over different subgroups in the dataset.
"""
Expand Down Expand Up @@ -158,11 +159,6 @@ def estimate_effect(self, outcome1, outcome2, agg="population", effect_types="di
A DataFrame if individual effect (input is a vector) where columns are effects
types and rows are effect in each individual.
Always: Value type is same is outcome_1 and outcome_2 type.
Examples:
>> estimate_effect(0.3, 0.6)
>> {"diff": -0.3, # 0.3 - 0.6
"ratio": 0.5, # 0.3 / 0.6
"or": 0.2857} # Odds-Ratio(0.3, 0.6)
"""
if agg == "population":
outcome1 = self._aggregate_population_outcome(outcome1)
Expand Down
21 changes: 17 additions & 4 deletions causallib/estimation/IPW.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@
from .BaseEstimator import PopulationOutcomeEstimator
from .BaseWeight import PropensityEstimator
from ..utils import general_tools as g_tools
from ..utils.StatUtils import robust_lookup

import numpy as np
import pandas as pd

import warnings

# TODO: implement a two-caliper truncation, one lower bound truncation epsilon and an upper bound one.


class IPW(PropensityEstimator, PopulationOutcomeEstimator):
"""
Expand All @@ -32,6 +35,7 @@ def __init__(self, learner, learner_kws=None, truncate_eps=None, use_stabilized=
See Also: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4351790/#S6title
"""
super(IPW, self).__init__(learner, learner_kws, use_stabilized)
self.__check_truncation_value_is_legal(truncate_eps)
self.truncate_eps = truncate_eps

def fit(self, X, a):
Expand Down Expand Up @@ -74,8 +78,7 @@ def compute_weights(self, X, a, treatment_values=None, truncate_eps=None, use_st
"""
weight_matrix = self.compute_weight_matrix(X, a, truncate_eps, use_stabilized)
if treatment_values is None:
weights = weight_matrix.lookup(a.index, a) # lookup table: take the column a[i] for every i in index(a).
weights = pd.Series(weights, index=a.index)
weights = robust_lookup(weight_matrix, a) # lookup table: take the column a[i] for every i in index(a).
else:
weights = weight_matrix[treatment_values]
return weights
Expand Down Expand Up @@ -149,7 +152,7 @@ def compute_propensity(self, X, a, treatment_values=None, truncate_eps=None):
probabilities = probabilities[treatment_values]
return probabilities

def compute_propensity_matrix(self, X, a, truncate_eps=None):
def compute_propensity_matrix(self, X, a=None, truncate_eps=None):
"""
Args:
Expand All @@ -163,9 +166,14 @@ def compute_propensity_matrix(self, X, a, truncate_eps=None):
very treatment.
"""
truncate_eps = self.truncate_eps if truncate_eps is None else truncate_eps
self.__check_truncation_value_is_legal(truncate_eps)

probabilities = self._predict(X)
if truncate_eps is not None:
if truncate_eps is not None: # since truncation value is legal, it must be a float.
print("Fraction of values being truncated: {:.5f}."
.format(probabilities.apply(lambda x: ~x.between(truncate_eps, 1-truncate_eps)).sum().sum() /
probabilities.size)) # TODO: do as log

probabilities = probabilities.clip(lower=truncate_eps, upper=1 - truncate_eps)

return probabilities
Expand Down Expand Up @@ -195,3 +203,8 @@ def estimate_population_outcome(self, X, a, y, w=None, treatment_values=None):
res[treatment_value] = np.average(y[a == treatment_value], weights=weights[a == treatment_value])
res = pd.Series(res)
return res

@staticmethod
def __check_truncation_value_is_legal(truncate_eps):
if truncate_eps is not None and not 0 <= truncate_eps <= 0.5:
raise AssertionError("Provided value for truncation (truncate_eps) should be between 0.0 and 0.5")
Empty file.
Loading

0 comments on commit 5ed91fc

Please sign in to comment.