aai-institute · mdbenito · Dec 29, 2022 · Nov 8, 2022 · Nov 9, 2022 · Nov 11, 2022
diff --git a/notebooks/influence_imagenet.ipynb b/notebooks/influence_imagenet.ipynb
diff --git a/notebooks/influence_wine.ipynb b/notebooks/influence_wine.ipynb
diff --git a/src/pydvl/influence/conjugate_gradient.py b/src/pydvl/influence/conjugate_gradient.py
@@ -33,7 +33,7 @@ def conjugate_gradient(A: "NDArray", batch_y: "NDArray") -> "NDArray":
     """
     batch_cg = []
     for y in batch_y:
-        y_cg, _ = cg(A.T, y)
+        y_cg, _ = cg(A, y)
         batch_cg.append(y_cg)
     return np.asarray(batch_cg)
 

diff --git a/src/pydvl/influence/frameworks/torch_differentiable.py b/src/pydvl/influence/frameworks/torch_differentiable.py
@@ -30,7 +30,7 @@ def flatten_gradient(grad):
     """
     Simple function to flatten a pyTorch gradient for use in subsequent calculation
     """
-    return torch.cat([el.view(-1) for el in grad])
+    return torch.cat([el.reshape(-1) for el in grad])
 
 
 class TorchTwiceDifferentiable(TwiceDifferentiable):
@@ -79,11 +79,18 @@ def split_grad(
         x = torch.as_tensor(x)
         y = torch.as_tensor(y)
 
+        params = [
+            param for param in self.model.parameters() if param.requires_grad == True
+        ]
+
         grads = [
             flatten_gradient(
                 autograd.grad(
-                    self.loss(torch.squeeze(self.model(x[i])), torch.squeeze(y[i])),
-                    self.model.parameters(),
+                    self.loss(
+                        torch.squeeze(self.model(x[i].unsqueeze(0))),
+                        torch.squeeze(y[i]),
+                    ),
+                    params,
                 )
             )
             .detach()
@@ -110,10 +117,12 @@ def grad(
         x = torch.as_tensor(x).requires_grad_(True)
         y = torch.as_tensor(y)
 
+        params = [
+            param for param in self.model.parameters() if param.requires_grad == True
+        ]
+
         loss_value = self.loss(torch.squeeze(self.model(x)), torch.squeeze(y))
-        grad_f = torch.autograd.grad(
-            loss_value, self.model.parameters(), create_graph=True
-        )
+        grad_f = torch.autograd.grad(loss_value, params, create_graph=True)
         return flatten_gradient(grad_f), x
 
     def mvp(
@@ -130,21 +139,27 @@ def mvp(
 
         :param grad_xy: an array [P] holding the gradients of the model parameters wrt input x and labels y, \
             where P is the number of parameters of the model. It is typically obtained through self.grad.
-        :param v: A np.ndarray [DxP] which multiplies the Hessian, where D is the number of directions.
+        :param v: A np.ndarray [DxP] or a one dimensional np.array [D] which multiplies the Hessian, \
+            where D is the number of directions.
         :param progress: True, iff progress shall be printed.
         :param backprop_on: tensor used in the second backpropagation (the first one is along x and y as defined \
             via grad_xy). If None, the model parameters are used.
         :returns: A np.ndarray representing the implicit matrix vector product of the model along the given directions.\
             Output shape is [DxP] if backprop_on is None, otherwise [DxM], with M the number of elements of backprop_on.
         """
         v = torch.as_tensor(v)
+        if v.ndim == 1:
+            v = v.unsqueeze(0)
 
         z = (grad_xy * Variable(v)).sum(dim=1)
+        params = [
+            param for param in self.model.parameters() if param.requires_grad == True
+        ]
         all_flattened_grads = [
             flatten_gradient(
                 autograd.grad(
                     z[i],
-                    self.model.parameters() if backprop_on is None else backprop_on,
+                    params if backprop_on is None else backprop_on,
                     retain_graph=True,
                 )
             )

diff --git a/src/pydvl/influence/general.py b/src/pydvl/influence/general.py
@@ -5,6 +5,7 @@
 from typing import TYPE_CHECKING, Callable, Dict, Optional
 
 import numpy as np
+from scipy.sparse.linalg import LinearOperator
 
 from .conjugate_gradient import (
     batched_preconditioned_conjugate_gradient,
@@ -119,13 +120,13 @@ def _calculate_influences_pert(
     """
     all_pert_influences = []
     for i in np.arange(len(x)):
-        grad_xy, tensor_x = model.grad(x[i], y[i])
+        grad_xy, tensor_x = model.grad(x[i : i + 1], y[i])
         perturbation_influences = model.mvp(
             grad_xy,
             influence_factors,
-            backprop_on=[tensor_x],
+            backprop_on=tensor_x,
         )
-        all_pert_influences.append(perturbation_influences)
+        all_pert_influences.append(perturbation_influences.reshape((-1, *x[i].shape)))
 
     return np.stack(all_pert_influences, axis=1)
 
@@ -189,7 +190,7 @@ def compute_influences(
     n_params = differentiable_model.num_params()
     dict_fact_algos: Dict[Optional[str], MatrixVectorProductInversionAlgorithm] = {
         "direct": lambda hvp, x: np.linalg.solve(hvp(np.eye(n_params)), x.T).T,  # type: ignore
-        "cg": lambda hvp, x: conjugate_gradient(hvp(np.eye(n_params)), x),  # type: ignore
+        "cg": lambda hvp, x: conjugate_gradient(LinearOperator((n_params, n_params), matvec=hvp), x),  # type: ignore
         "batched_cg": lambda hvp, x: batched_preconditioned_conjugate_gradient(  # type: ignore
             hvp, x, **inversion_method_kwargs
         )[

diff --git a/src/pydvl/influence/model_wrappers/__init__.py b/src/pydvl/influence/model_wrappers/__init__.py
@@ -3,5 +3,5 @@
 __all__ = [
     "TorchLinearRegression",
     "TorchBinaryLogisticRegression",
-    "TorchNeuralNetwork",
+    "TorchMLP",
 ]
diff --git a/src/pydvl/influence/model_wrappers/torch_wrappers.py b/src/pydvl/influence/model_wrappers/torch_wrappers.py
@@ -3,7 +3,7 @@
  three are defined explicitly.
 """
 import logging
-from abc import ABC
+from abc import ABC, abstractmethod
 from typing import Any, Callable, List, Optional, Tuple, Union
 
 import numpy as np
@@ -23,7 +23,8 @@
 __all__ = [
     "TorchLinearRegression",
     "TorchBinaryLogisticRegression",
-    "TorchNeuralNetwork",
+    "TorchMLP",
+    "TorchModel",
 ]
 
 logger = logging.getLogger(__name__)
@@ -47,11 +48,12 @@ def __getitem__(self, idx):
         return self.x[idx], self.y[idx]
 
 
-class TorchModel(ABC):
+class TorchModelBase(ABC):
     def __init__(self):
         if not _TORCH_INSTALLED:
             raise RuntimeWarning("This function requires PyTorch.")
 
+    @abstractmethod
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         pass
 
@@ -106,8 +108,12 @@ def fit(
                 if scheduler:
                     scheduler.step()
             pred_val = self.forward(x_val)
-            val_loss.append(loss(torch.squeeze(pred_val), torch.squeeze(y_val)).item())
-            train_loss.append(np.mean(batch_loss))
+            epoch_val_loss = loss(torch.squeeze(pred_val), torch.squeeze(y_val)).item()
+            mean_epoch_train_loss = np.mean(batch_loss)
+            val_loss.append(epoch_val_loss)
+            train_loss.append(mean_epoch_train_loss)
+            logger.info(f"Epoch: {epoch} ---> Training loss: {mean_epoch_train_loss}")
+            logger.info(f"Epoch: {epoch} ---> Validation loss: {epoch_val_loss}")
         return train_loss, val_loss
 
     def predict(self, x: torch.Tensor) -> np.ndarray:
@@ -133,7 +139,15 @@ def score(
         return score(self.forward(x), y).detach().numpy()  # type: ignore
 
 
-class TorchLinearRegression(nn.Module, TorchModel):
+class TorchModel(TorchModelBase):
+    def __init__(self, model: nn.Module):
+        self.model = model
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.model(x)
+
+
+class TorchLinearRegression(nn.Module, TorchModelBase):
     """
     A simple linear regression model (with bias) f(x)=Ax+b.
     """
@@ -172,7 +186,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return x @ self.A.T + self.b
 
 
-class TorchBinaryLogisticRegression(nn.Module, TorchModel):
+class TorchBinaryLogisticRegression(nn.Module, TorchModelBase):
     """
     A simple binary logistic regression model p(y)=sigmoid(dot(a, x) + b).
     """
@@ -203,7 +217,7 @@ def forward(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
         return torch.sigmoid(x @ self.A.T + self.b)
 
 
-class TorchNeuralNetwork(nn.Module, TorchModel):
+class TorchMLP(nn.Module, TorchModelBase):
     """
     A simple fully-connected neural network f(x) model defined by y = v_K, v_i = o(A v_(i-1) + b), v_1 = x. It contains
     K layers and K - 2 hidden layers. It holds that K >= 2, because every network contains a input and output.

diff --git a/src/pydvl/utils/dataset.py b/src/pydvl/utils/dataset.py
@@ -17,6 +17,7 @@
 
 """
 
+import logging
 from collections import OrderedDict
 from pathlib import Path
 from typing import Any, Callable, Iterable, List, Optional, Sequence, Tuple, Union
@@ -29,6 +30,7 @@
 from sklearn.utils import Bunch, check_X_y
 
 __all__ = ["Dataset", "GroupedDataset", "load_spotify_dataset", "load_wine_dataset"]
+logger = logging.getLogger(__name__)
 
 
 class Dataset:
@@ -421,7 +423,7 @@ def load_wine_dataset(
     :param train_size: fraction of points used for training dataset
     :param test_size: fraction of points used for test dataset
     :param random_state: fix random seed. If None, no random seed is set.
-    :returns: A tuple of four elements with the first three being input and
+    :return: A tuple of four elements with the first three being input and
         target values in the form of matrices of shape (N,D) the first
         and (N,) the second. The fourth element is a list containing names of
         features of the model. (FIXME doc)
@@ -465,6 +467,60 @@ def load_wine_dataset(
     )
 
 
+def load_preprocess_imagenet(
+    train_size: float,
+    test_size: float,
+    downsample_ds_to_fraction: float = 1,
+    keep_labels: Optional[List] = None,
+    random_state: Optional[int] = None,
+    is_CI: bool = False,
+):
+    try:
+        from datasets import load_dataset
+        from torchvision import transforms
+    except ImportError as e:
+        raise RuntimeError(
+            "PyTorch, Torchvision and datasets are required to load and "
+            "process the imagenet dataset."
+        ) from e
+
+    preprocess_rgb = transforms.Compose(
+        [
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.225, 0.225, 0.225]),
+        ]
+    )
+
+    def _process_dataset(ds):
+        processed_ds = {"normalized_images": [], "labels": [], "images": []}
+        for i, item in enumerate(ds):
+            if item["image"].mode == "RGB":
+                processed_ds["normalized_images"].append(preprocess_rgb(item["image"]))
+                processed_ds["images"].append(item["image"])
+                processed_ds["labels"].append(item["label"])
+        return pd.DataFrame.from_dict(processed_ds)
+
+    tiny_imagenet = load_dataset("Maysee/tiny-imagenet", split="train")
+    if downsample_ds_to_fraction != 1:
+        tiny_imagenet = tiny_imagenet.shard(1 / downsample_ds_to_fraction, 0)
+    if keep_labels is not None:
+        tiny_imagenet = tiny_imagenet.filter(lambda item: item["label"] in keep_labels)
+
+    split_ds = tiny_imagenet.train_test_split(
+        train_size=1 - test_size,
+        seed=random_state,
+    )
+    test_ds = _process_dataset(split_ds["test"])
+
+    split_ds = split_ds["train"].train_test_split(
+        train_size=train_size,
+        seed=random_state,
+    )
+    train_ds = _process_dataset(split_ds["train"])
+    val_ds = _process_dataset(split_ds["test"])
+    return train_ds, val_ds, test_ds
+
+
 def synthetic_classification_dataset(
     mus: np.ndarray,
     sigma: float,

diff --git a/tests/influence/test_influences.py b/tests/influence/test_influences.py
@@ -16,7 +16,7 @@
         influences_perturbation_linear_regression_analytical,
         influences_up_linear_regression_analytical,
     )
-    from pydvl.influence.model_wrappers import TorchLinearRegression, TorchNeuralNetwork
+    from pydvl.influence.model_wrappers import TorchLinearRegression, TorchMLP
     from pydvl.utils.dataset import load_wine_dataset
 except ImportError:
     pass
@@ -311,7 +311,7 @@ def test_influences_with_neural_network_explicit_hessian():
     num_classes = len(unique_classes)
     num_epochs = 300
     network_size = [16, 16]
-    nn = TorchNeuralNetwork(feature_dimension, num_classes, network_size)
+    nn = TorchMLP(feature_dimension, num_classes, network_size)
     optimizer = Adam(params=nn.parameters(), lr=0.001, weight_decay=0.001)
     loss = F.cross_entropy
     nn.fit(