Merge pull request #341 from aai-institute/feature/shapley-games

Toy Shapley games
aai-institute · Jan 23, 2024 · f2f7466 · f2f7466
2 parents 3e7b4c8 + 023040f
commit f2f7466
Show file tree

Hide file tree

Showing 20 changed files with 1,690 additions and 772 deletions.
diff --git a/.test_durations b/.test_durations
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,16 +2,23 @@
 
 ## Unreleased
 
+### Added
+
+- Implement new method: `EkfacInfluence`
+  [PR #451](https://github.com/aai-institute/pyDVL/issues/451)
+- New notebook to showcase ekfac for LLMs
+  [PR #483](https://github.com/aai-institute/pyDVL/pull/483)
+- Implemented exact games in Castro et al. 2009 and 2017
+  [PR #341](https://github.com/appliedAI-Initiative/pyDVL/pull/341)
+
 ### Fixed
 
 - Bug in using `DaskInfluenceCalcualator` with `TorchnumpyConverter`
   for single dimensional arrays [PR #485](https://github.com/aai-institute/pyDVL/pull/485)
 - Fix implementations of `to` methods of `TorchInfluenceFunctionModel` implementations
   [PR #487](https://github.com/aai-institute/pyDVL/pull/487)
-- Implement new method: `EkfacInfluence`
-  [PR #476](https://github.com/aai-institute/pyDVL/pull/476)
-- New notebook to showcase ekfac for LLMs
-  [PR #483](https://github.com/aai-institute/pyDVL/pull/483)
+- Fixed bug with checking for converged values in semivalues
+  [PR #341](https://github.com/appliedAI-Initiative/pyDVL/pull/341)
 
 ## 0.8.0 - 🆕 New interfaces, scaling computation, bug fixes and improvements 🎁
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -343,8 +343,12 @@ runs](#skipping-ci-runs)).
 3. We split the tests based on their duration into groups and run them in parallel.
 
    For that we use [pytest-split](https://jerry-git.github.io/pytest-split)
-   to first store the duration of all tests with `pytest --store-durations pytest --slow-tests`
+   to first store the duration of all tests with
+   `tox -e tests -- --store-durations --slow-tests`
    in a `.test_durations` file.
+
+   Alternatively, we case use pytest directly
+   `pytest --store-durations --slow-tests`.
 
    > **Note** This does not have to be done each time a new test or test case
    > is added. For new tests and test cases pytes-split assumes
@@ -359,11 +363,14 @@ runs](#skipping-ci-runs)).
    Then we can have as many splits as we want:
 
    ```shell
-   pytest --splits 3 --group 1
-   pytest --splits 3 --group 2
-   pytest --splits 3 --group 3
+   tox -e tests -- --splits 3 --group 1
+   tox -e tests -- --splits 3 --group 2
+   tox -e tests -- --splits 3 --group 3
    ```
 
+   Alternatively, we case use pytest directly
+   `pytest --splits 3 ---group 1`.
+
    Each one of these commands should be run in a separate shell/job
    to run the test groups in parallel and decrease the total runtime.
 

diff --git a/src/pydvl/utils/types.py b/src/pydvl/utils/types.py
@@ -23,7 +23,7 @@
 ]
 
 IndexT = TypeVar("IndexT", bound=np.int_)
-NameT = TypeVar("NameT", bound=np.object_)
+NameT = TypeVar("NameT", np.object_, np.int_)
 R = TypeVar("R", covariant=True)
 Seed = Union[int, Generator]
 

diff --git a/src/pydvl/utils/utility.py b/src/pydvl/utils/utility.py
@@ -38,7 +38,7 @@
 from pydvl.utils.score import Scorer
 from pydvl.utils.types import SupervisedModel
 
-__all__ = ["Utility", "DataUtilityLearning", "MinerGameUtility", "GlovesGameUtility"]
+__all__ = ["Utility", "DataUtilityLearning"]
 
 logger = logging.getLogger(__name__)
 
@@ -356,120 +356,3 @@ def __call__(self, indices: Iterable[int]) -> float:
     def data(self) -> Dataset:
         """Returns the wrapped utility's [Dataset][pydvl.utils.dataset.Dataset]."""
         return self.utility.data
-
-
-class MinerGameUtility(Utility):
-    r"""Toy game utility that is used for testing and demonstration purposes.
-
-    Consider a group of n miners, who have discovered large bars of gold.
-
-    If two miners can carry one piece of gold, then the payoff of a
-    coalition $S$ is:
-
-    $${
-    v(S) = \left\{\begin{array}{lll}
-    \mid S \mid / 2 & \text{, if} & \mid S \mid \text{ is even} \\
-    ( \mid S \mid - 1)/2 & \text{, if} & \mid S \mid \text{ is odd}
-    \end{array}\right.
-    }$$
-
-    If there are more than two miners and there is an even number of miners,
-    then the core consists of the single payoff where each miner gets 1/2.
-
-    If there is an odd number of miners, then the core is empty.
-
-    Taken from [Wikipedia](https://en.wikipedia.org/wiki/Core_(game_theory))
-
-    Args:
-        n_miners: Number of miners that participate in the game.
-    """
-
-    def __init__(self, n_miners: int, **kwargs):
-        if n_miners <= 2:
-            raise ValueError(f"n_miners, {n_miners} should be > 2")
-        self.n_miners = n_miners
-
-        x = np.arange(n_miners)[..., np.newaxis]
-        # The y values don't matter here
-        y = np.zeros_like(x)
-
-        self.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)
-
-    def __call__(self, indices: Iterable[int]) -> float:
-        n = len(tuple(indices))
-        if n % 2 == 0:
-            return n / 2
-        else:
-            return (n - 1) / 2
-
-    def _initialize_utility_wrapper(self):
-        pass
-
-    def exact_least_core_values(self) -> Tuple[NDArray[np.float_], float]:
-        if self.n_miners % 2 == 0:
-            values = np.array([0.5] * self.n_miners)
-            subsidy = 0.0
-        else:
-            values = np.array(
-                [(self.n_miners - 1) / (2 * self.n_miners)] * self.n_miners
-            )
-            subsidy = (self.n_miners - 1) / (2 * self.n_miners)
-        return values, subsidy
-
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}(n={self.n_miners})"
-
-
-class GlovesGameUtility(Utility):
-    r"""Toy game utility that is used for testing and demonstration purposes.
-
-    In this game, some players have a left glove and others a right glove.
-    Single gloves have a worth of zero while pairs have a worth of 1.
-
-    The payoff of a coalition $S$ is:
-
-    $${
-    v(S) = \min( \mid S \cap L \mid, \mid S \cap R \mid )
-    }$$
-
-    Where $L$, respectively $R$, is the set of players with left gloves,
-    respectively right gloves.
-
-    Args:
-        left: Number of players with a left glove.
-        right: Number of player with a right glove.
-
-    """
-
-    def __init__(self, left: int, right: int, **kwargs):
-        self.left = left
-        self.right = right
-
-        x = np.empty(left + right)[..., np.newaxis]
-        # The y values don't matter here
-        y = np.zeros_like(x)
-
-        self.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)
-
-    def __call__(self, indices: Iterable[int]) -> float:
-        left_sum = float(np.sum(np.asarray(indices) < self.left))
-        right_sum = float(np.sum(np.asarray(indices) >= self.left))
-        return min(left_sum, right_sum)
-
-    def _initialize_utility_wrapper(self):
-        pass
-
-    def exact_least_core_values(self) -> Tuple[NDArray[np.float_], float]:
-        if self.left == self.right:
-            subsidy = -0.5
-            values = np.array([0.5] * (self.left + self.right))
-        elif self.left < self.right:
-            subsidy = 0.0
-            values = np.array([1.0] * self.left + [0.0] * self.right)
-        else:
-            subsidy = 0.0
-            values = np.array([0.0] * self.left + [1.0] * self.right)
-        return values, subsidy
-
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}(L={self.left}, R={self.right})"