Skip to content

Commit

Permalink
Merge pull request #341 from aai-institute/feature/shapley-games
Browse files Browse the repository at this point in the history
Toy Shapley games
  • Loading branch information
AnesBenmerzoug authored Jan 23, 2024
2 parents 3e7b4c8 + 023040f commit f2f7466
Show file tree
Hide file tree
Showing 20 changed files with 1,690 additions and 772 deletions.
1,015 changes: 696 additions & 319 deletions .test_durations

Large diffs are not rendered by default.

15 changes: 11 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,23 @@

## Unreleased

### Added

- Implement new method: `EkfacInfluence`
[PR #451](https://github.com/aai-institute/pyDVL/issues/451)
- New notebook to showcase ekfac for LLMs
[PR #483](https://github.com/aai-institute/pyDVL/pull/483)
- Implemented exact games in Castro et al. 2009 and 2017
[PR #341](https://github.com/appliedAI-Initiative/pyDVL/pull/341)

### Fixed

- Bug in using `DaskInfluenceCalcualator` with `TorchnumpyConverter`
for single dimensional arrays [PR #485](https://github.com/aai-institute/pyDVL/pull/485)
- Fix implementations of `to` methods of `TorchInfluenceFunctionModel` implementations
[PR #487](https://github.com/aai-institute/pyDVL/pull/487)
- Implement new method: `EkfacInfluence`
[PR #476](https://github.com/aai-institute/pyDVL/pull/476)
- New notebook to showcase ekfac for LLMs
[PR #483](https://github.com/aai-institute/pyDVL/pull/483)
- Fixed bug with checking for converged values in semivalues
[PR #341](https://github.com/appliedAI-Initiative/pyDVL/pull/341)

## 0.8.0 - 🆕 New interfaces, scaling computation, bug fixes and improvements 🎁

Expand Down
15 changes: 11 additions & 4 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,12 @@ runs](#skipping-ci-runs)).
3. We split the tests based on their duration into groups and run them in parallel.

For that we use [pytest-split](https://jerry-git.github.io/pytest-split)
to first store the duration of all tests with `pytest --store-durations pytest --slow-tests`
to first store the duration of all tests with
`tox -e tests -- --store-durations --slow-tests`
in a `.test_durations` file.

Alternatively, we case use pytest directly
`pytest --store-durations --slow-tests`.

> **Note** This does not have to be done each time a new test or test case
> is added. For new tests and test cases pytes-split assumes
Expand All @@ -359,11 +363,14 @@ runs](#skipping-ci-runs)).
Then we can have as many splits as we want:

```shell
pytest --splits 3 --group 1
pytest --splits 3 --group 2
pytest --splits 3 --group 3
tox -e tests -- --splits 3 --group 1
tox -e tests -- --splits 3 --group 2
tox -e tests -- --splits 3 --group 3
```

Alternatively, we case use pytest directly
`pytest --splits 3 ---group 1`.

Each one of these commands should be run in a separate shell/job
to run the test groups in parallel and decrease the total runtime.

Expand Down
2 changes: 1 addition & 1 deletion src/pydvl/utils/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
]

IndexT = TypeVar("IndexT", bound=np.int_)
NameT = TypeVar("NameT", bound=np.object_)
NameT = TypeVar("NameT", np.object_, np.int_)
R = TypeVar("R", covariant=True)
Seed = Union[int, Generator]

Expand Down
119 changes: 1 addition & 118 deletions src/pydvl/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from pydvl.utils.score import Scorer
from pydvl.utils.types import SupervisedModel

__all__ = ["Utility", "DataUtilityLearning", "MinerGameUtility", "GlovesGameUtility"]
__all__ = ["Utility", "DataUtilityLearning"]

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -356,120 +356,3 @@ def __call__(self, indices: Iterable[int]) -> float:
def data(self) -> Dataset:
"""Returns the wrapped utility's [Dataset][pydvl.utils.dataset.Dataset]."""
return self.utility.data


class MinerGameUtility(Utility):
r"""Toy game utility that is used for testing and demonstration purposes.
Consider a group of n miners, who have discovered large bars of gold.
If two miners can carry one piece of gold, then the payoff of a
coalition $S$ is:
$${
v(S) = \left\{\begin{array}{lll}
\mid S \mid / 2 & \text{, if} & \mid S \mid \text{ is even} \\
( \mid S \mid - 1)/2 & \text{, if} & \mid S \mid \text{ is odd}
\end{array}\right.
}$$
If there are more than two miners and there is an even number of miners,
then the core consists of the single payoff where each miner gets 1/2.
If there is an odd number of miners, then the core is empty.
Taken from [Wikipedia](https://en.wikipedia.org/wiki/Core_(game_theory))
Args:
n_miners: Number of miners that participate in the game.
"""

def __init__(self, n_miners: int, **kwargs):
if n_miners <= 2:
raise ValueError(f"n_miners, {n_miners} should be > 2")
self.n_miners = n_miners

x = np.arange(n_miners)[..., np.newaxis]
# The y values don't matter here
y = np.zeros_like(x)

self.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)

def __call__(self, indices: Iterable[int]) -> float:
n = len(tuple(indices))
if n % 2 == 0:
return n / 2
else:
return (n - 1) / 2

def _initialize_utility_wrapper(self):
pass

def exact_least_core_values(self) -> Tuple[NDArray[np.float_], float]:
if self.n_miners % 2 == 0:
values = np.array([0.5] * self.n_miners)
subsidy = 0.0
else:
values = np.array(
[(self.n_miners - 1) / (2 * self.n_miners)] * self.n_miners
)
subsidy = (self.n_miners - 1) / (2 * self.n_miners)
return values, subsidy

def __repr__(self) -> str:
return f"{self.__class__.__name__}(n={self.n_miners})"


class GlovesGameUtility(Utility):
r"""Toy game utility that is used for testing and demonstration purposes.
In this game, some players have a left glove and others a right glove.
Single gloves have a worth of zero while pairs have a worth of 1.
The payoff of a coalition $S$ is:
$${
v(S) = \min( \mid S \cap L \mid, \mid S \cap R \mid )
}$$
Where $L$, respectively $R$, is the set of players with left gloves,
respectively right gloves.
Args:
left: Number of players with a left glove.
right: Number of player with a right glove.
"""

def __init__(self, left: int, right: int, **kwargs):
self.left = left
self.right = right

x = np.empty(left + right)[..., np.newaxis]
# The y values don't matter here
y = np.zeros_like(x)

self.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)

def __call__(self, indices: Iterable[int]) -> float:
left_sum = float(np.sum(np.asarray(indices) < self.left))
right_sum = float(np.sum(np.asarray(indices) >= self.left))
return min(left_sum, right_sum)

def _initialize_utility_wrapper(self):
pass

def exact_least_core_values(self) -> Tuple[NDArray[np.float_], float]:
if self.left == self.right:
subsidy = -0.5
values = np.array([0.5] * (self.left + self.right))
elif self.left < self.right:
subsidy = 0.0
values = np.array([1.0] * self.left + [0.0] * self.right)
else:
subsidy = 0.0
values = np.array([0.0] * self.left + [1.0] * self.right)
return values, subsidy

def __repr__(self) -> str:
return f"{self.__class__.__name__}(L={self.left}, R={self.right})"
Loading

0 comments on commit f2f7466

Please sign in to comment.