Merge pull request BiomedSciAI#5 from CausalDev/restructure_evaluation

Evaluation module
mmdanziger · Feb 21, 2019 · 5ed91fc · 5ed91fc
2 parents f0d2f73 + dacb64a
commit 5ed91fc
Show file tree

Hide file tree

Showing 13 changed files with 2,622 additions and 26 deletions.
diff --git a/causallib/estimation/BaseEstimator.py b/causallib/estimation/BaseEstimator.py
@@ -51,11 +51,13 @@ def estimate_effect(self, outcome_1, outcome_2, effect_types="diff"):
                                       A DataFrame if individual effect (input is a vector) where columns are effects
                                       types and rows are effect in each individual.
                                       Always: Value type is same is outcome_1 and outcome_2 type.
-                                      Examples:
-                                          >> estimate_effect(0.3, 0.6)
-                                          >> {"diff": -0.3,    # 0.3 - 0.6
-                                              "ratio": 0.5,    # 0.3 / 0.6
-                                              "or": 0.2857}    # Odds-Ratio(0.3, 0.6)
+        Examples:
+            >>> from causallib.estimation.BaseEstimator import EffectEstimator
+            >>> effect_estimator = EffectEstimator()
+            >>> effect_estimator.estimate_effect(0.3, 0.6)
+            >>> {"diff": -0.3,    # 0.3 - 0.6
+                 "ratio": 0.5,    # 0.3 / 0.6
+                 "or": 0.2857}    # Odds-Ratio(0.3, 0.6)
         """
         effect_types = [effect_types] if isscalar(effect_types) else effect_types
         results = {}
@@ -68,8 +70,7 @@ def estimate_effect(self, outcome_1, outcome_2, effect_types="diff"):
         return results
 
 
-# TODO: maybe remove the inheritance from abc.ABC. Leave only the method decorator.
-class PopulationOutcomeEstimator(abc.ABC, EffectEstimator):
+class PopulationOutcomeEstimator(EffectEstimator):
     """
     Interface for estimating aggregated outcome over different subgroups in the dataset.
     """
@@ -158,11 +159,6 @@ def estimate_effect(self, outcome1, outcome2, agg="population", effect_types="di
                                       A DataFrame if individual effect (input is a vector) where columns are effects
                                       types and rows are effect in each individual.
                                       Always: Value type is same is outcome_1 and outcome_2 type.
-                                      Examples:
-                                          >> estimate_effect(0.3, 0.6)
-                                          >> {"diff": -0.3,    # 0.3 - 0.6
-                                              "ratio": 0.5,    # 0.3 / 0.6
-                                              "or": 0.2857}    # Odds-Ratio(0.3, 0.6)
         """
         if agg == "population":
             outcome1 = self._aggregate_population_outcome(outcome1)

diff --git a/causallib/estimation/IPW.py b/causallib/estimation/IPW.py
@@ -8,12 +8,15 @@
 from .BaseEstimator import PopulationOutcomeEstimator
 from .BaseWeight import PropensityEstimator
 from ..utils import general_tools as g_tools
+from ..utils.StatUtils import robust_lookup
 
 import numpy as np
 import pandas as pd
 
 import warnings
 
+# TODO: implement a two-caliper truncation, one lower bound truncation epsilon and an upper bound one.
+
 
 class IPW(PropensityEstimator, PopulationOutcomeEstimator):
     """
@@ -32,6 +35,7 @@ def __init__(self, learner, learner_kws=None, truncate_eps=None, use_stabilized=
                                    See Also: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4351790/#S6title
         """
         super(IPW, self).__init__(learner, learner_kws, use_stabilized)
+        self.__check_truncation_value_is_legal(truncate_eps)
         self.truncate_eps = truncate_eps
 
     def fit(self, X, a):
@@ -74,8 +78,7 @@ def compute_weights(self, X, a, treatment_values=None, truncate_eps=None, use_st
         """
         weight_matrix = self.compute_weight_matrix(X, a, truncate_eps, use_stabilized)
         if treatment_values is None:
-            weights = weight_matrix.lookup(a.index, a)   # lookup table: take the column a[i] for every i in index(a).
-            weights = pd.Series(weights, index=a.index)
+            weights = robust_lookup(weight_matrix, a)   # lookup table: take the column a[i] for every i in index(a).
         else:
             weights = weight_matrix[treatment_values]
         return weights
@@ -149,7 +152,7 @@ def compute_propensity(self, X, a, treatment_values=None, truncate_eps=None):
         probabilities = probabilities[treatment_values]
         return probabilities
 
-    def compute_propensity_matrix(self, X, a, truncate_eps=None):
+    def compute_propensity_matrix(self, X, a=None, truncate_eps=None):
         """
 
         Args:
@@ -163,9 +166,14 @@ def compute_propensity_matrix(self, X, a, truncate_eps=None):
                           very treatment.
         """
         truncate_eps = self.truncate_eps if truncate_eps is None else truncate_eps
+        self.__check_truncation_value_is_legal(truncate_eps)
 
         probabilities = self._predict(X)
-        if truncate_eps is not None:
+        if truncate_eps is not None:        # since truncation value is legal, it must be a float.
+            print("Fraction of values being truncated: {:.5f}."
+                  .format(probabilities.apply(lambda x: ~x.between(truncate_eps, 1-truncate_eps)).sum().sum() /
+                          probabilities.size))                          # TODO: do as log
+
             probabilities = probabilities.clip(lower=truncate_eps, upper=1 - truncate_eps)
 
         return probabilities
@@ -195,3 +203,8 @@ def estimate_population_outcome(self, X, a, y, w=None, treatment_values=None):
             res[treatment_value] = np.average(y[a == treatment_value], weights=weights[a == treatment_value])
         res = pd.Series(res)
         return res
+
+    @staticmethod
+    def __check_truncation_value_is_legal(truncate_eps):
+        if truncate_eps is not None and not 0 <= truncate_eps <= 0.5:
+            raise AssertionError("Provided value for truncation (truncate_eps) should be between 0.0 and 0.5")
diff --git a/causallib/evaluation/__init__.py b/causallib/evaluation/__init__.py