diff --git a/.test_durations b/.test_durations
index bf283f1a9..7a7768311 100644
--- a/.test_durations
+++ b/.test_durations
@@ -1,4 +1,74 @@
 {
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv1d_nn_pert]": 2.59026943400022,
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv1d_nn_up]": 2.7703545530002884,
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv1d_no_grad_up]": 0.8260756999989098,
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv2d_nn_pert]": 1.101015895999808,
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv2d_nn_up]": 1.206421760000012,
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv3d_nn_pert]": 1.4294998579989624,
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[conv3d_nn_up]": 1.3345100419992377,
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[simple_nn_class_up]": 3.361096810000163,
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[simple_nn_pert]": 0.6431655560008949,
+    "tests/influence/test_influence_calculator.py::test_dask_ekfac_influence[simple_nn_up]": 0.7108467549987836,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_pert-arnoldi]": 1.4143697240015172,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_pert-cg]": 2.522983850998571,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_pert-direct]": 1.3974800130017684,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_up-arnoldi]": 1.4222584220005956,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_up-cg]": 2.5742563249987143,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_nn_up-direct]": 1.3653277730008995,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_no_grad_up-arnoldi]": 0.48600830500072334,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_no_grad_up-cg]": 0.7124692380002671,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv1d_no_grad_up-direct]": 0.47575023000172223,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_pert-arnoldi]": 0.8454596849987865,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_pert-cg]": 1.7426123529985489,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_pert-direct]": 0.808057442000063,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_up-arnoldi]": 0.8408936979994905,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_up-cg]": 1.8711466349977854,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv2d_nn_up-direct]": 0.7968461060008849,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_pert-arnoldi]": 1.041476223997961,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_pert-cg]": 2.6348945509980695,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_pert-direct]": 1.0208977649999724,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_up-arnoldi]": 1.3290127370019036,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_up-cg]": 5.805227180999282,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[conv3d_nn_up-direct]": 1.8304335940010787,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_class_up-arnoldi]": 1.9109577300005185,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_class_up-cg]": 4.174298836998787,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_class_up-direct]": 1.5329143839990138,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_pert-arnoldi]": 0.4525704900006531,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_pert-cg]": 0.8970914879992051,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_pert-direct]": 0.46585072099878744,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_up-arnoldi]": 0.4456351110020478,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_up-cg]": 1.0693235140006436,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_factors[simple_nn_up-direct]": 0.473094435999883,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv1d_nn_pert]": 2.9761773999980505,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv1d_nn_up]": 4.120701600999382,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv1d_no_grad_up]": 1.3337201610011107,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv2d_nn_pert]": 2.1662617799993313,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv2d_nn_up]": 3.132741712999632,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv3d_nn_pert]": 2.958187670999905,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[conv3d_nn_up]": 29.53393912699903,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[simple_nn_class_up]": 3.257567571998152,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[simple_nn_pert]": 1.361139677999745,
+    "tests/influence/test_influence_calculator.py::test_dask_influence_nn[simple_nn_up]": 1.261350679998941,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv1d_nn_pert]": 2.6579838110010314,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv1d_nn_up]": 2.6499502710012166,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv1d_no_grad_up]": 0.8881425300005503,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv2d_nn_pert]": 1.463408392999554,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv2d_nn_up]": 1.4602782740003022,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv3d_nn_pert]": 1.7320480180023878,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[conv3d_nn_up]": 1.5744405670029664,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[simple_nn_class_up]": 4.504372877998321,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[simple_nn_pert]": 0.8736393959989073,
+    "tests/influence/test_influence_calculator.py::test_sequential_calculator[simple_nn_up]": 0.8922971840001992,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv1d_nn_pert]": 2.381483594999736,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv1d_nn_up]": 2.314768557000207,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv1d_no_grad_up]": 0.7438636890019552,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv2d_nn_pert]": 0.9980942529964523,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv2d_nn_up]": 1.1705565329993988,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv3d_nn_pert]": 1.2230443010012095,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[conv3d_nn_up]": 4.6594328910014156,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[simple_nn_class_up]": 3.0931850600009057,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[simple_nn_pert]": 0.7189972920023138,
+    "tests/influence/test_influence_calculator.py::test_thread_safety_violation_error[simple_nn_up]": 0.7615732119993481,
     "tests/influence/test_influences.py::test_influence_linear_model[cg-train_set_size_200-perturbation]": 0.8664472580130678,
     "tests/influence/test_influences.py::test_influence_linear_model[cg-train_set_size_200-up]": 0.18988716599415056,
     "tests/influence/test_influences.py::test_influence_linear_model[direct-train_set_size_200-perturbation]": 0.66577532098745,
@@ -78,61 +148,230 @@
     "tests/influence/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data3-8-160-1e-05]": 4.422049004002474,
     "tests/influence/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data4-4-250-1e-05]": 9.08382142597111,
     "tests/influence/test_util.py::test_lanzcos_low_rank_hessian_approx_exception": 0.0035210640053264797,
-    "tests/test_plugin.py::test_failure": 0.001304317032918334,
-    "tests/test_plugin.py::test_fixture_call_no_arguments": 0.0014436830242630094,
-    "tests/test_plugin.py::test_fixture_only[1]": 0.0011941569682676345,
-    "tests/test_plugin.py::test_fixture_only[2]": 0.0013037140015512705,
-    "tests/test_plugin.py::test_marker_and_fixture[1]": 0.0011783259978983551,
-    "tests/test_plugin.py::test_marker_and_fixture[2]": 0.001276884024264291,
-    "tests/test_plugin.py::test_marker_ignore_exception[0]": 0.0011224850022699684,
-    "tests/test_plugin.py::test_marker_ignore_exception[1]": 0.0009688139834906906,
-    "tests/test_plugin.py::test_marker_ignore_exception[2]": 0.0011277040175627917,
-    "tests/test_plugin.py::test_marker_ignore_exception[3]": 0.001226628024596721,
-    "tests/test_plugin.py::test_marker_ignore_exception[4]": 0.0010670160118024796,
-    "tests/test_plugin.py::test_marker_only[0]": 0.0027732179732993245,
-    "tests/test_plugin.py::test_marker_only_with_data_fixture[0]": 0.0012184199877083302,
-    "tests/test_plugin.py::test_marker_only_with_data_fixture[1]": 0.0014672029938083142,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data0-4-avg]": 0.20045989400023245,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data0-4-full]": 0.06902083099885203,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data1-5-avg]": 0.5016348780009139,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data1-5-full]": 0.1801713530021516,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data2-10-avg]": 0.1689359069987404,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data2-10-full]": 0.06361526499858883,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data3-8-avg]": 0.23464886900001147,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data3-8-full]": 0.08587454999906186,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data4-4-avg]": 3.4049244679990807,
+    "tests/influence/torch/test_functional.py::test_get_hessian[model_data4-4-full]": 0.8161465619996306,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data0-4-avg-no_precomputed_grad]": 0.07086462199913512,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data0-4-avg-precomputed_grad]": 0.05853749299967603,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data0-4-full-no_precomputed_grad]": 0.042555562999041285,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data0-4-full-precomputed_grad]": 0.04254312700140872,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data1-5-avg-no_precomputed_grad]": 0.12398883400055638,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data1-5-avg-precomputed_grad]": 0.09578048600087641,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data1-5-full-no_precomputed_grad]": 0.08458008900015557,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data1-5-full-precomputed_grad]": 0.08726069599833863,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data2-10-avg-no_precomputed_grad]": 0.04802275299880421,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data2-10-avg-precomputed_grad]": 0.03521194200220634,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data2-10-full-no_precomputed_grad]": 0.036551941999277915,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data2-10-full-precomputed_grad]": 0.037776481000037165,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data3-8-avg-no_precomputed_grad]": 0.07563198299976648,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data3-8-avg-precomputed_grad]": 0.06490736200066749,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data3-8-full-no_precomputed_grad]": 0.04830970400143997,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data3-8-full-precomputed_grad]": 0.046601254001870984,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data4-4-avg-no_precomputed_grad]": 0.6670472200003132,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data4-4-avg-precomputed_grad]": 0.5045342149987846,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data4-4-full-no_precomputed_grad]": 0.2970590019995143,
+    "tests/influence/torch/test_functional.py::test_get_hvp_function[model_data4-4-full-precomputed_grad]": 0.3075855399983993,
+    "tests/influence/torch/test_functional.py::test_hvp[model_data0-1e-05]": 0.021658439001839724,
+    "tests/influence/torch/test_functional.py::test_hvp[model_data1-1e-05]": 0.059265748001053,
+    "tests/influence/torch/test_functional.py::test_hvp[model_data2-1e-05]": 0.01743878900015261,
+    "tests/influence/torch/test_functional.py::test_hvp[model_data3-1e-05]": 0.02271863699934329,
+    "tests/influence/torch/test_functional.py::test_hvp[model_data4-1e-05]": 0.03280089899999439,
+    "tests/influence/torch/test_functional.py::test_matrix_jacobian_product[100-5-110]": 0.022210784001799766,
+    "tests/influence/torch/test_functional.py::test_matrix_jacobian_product[25-10-500]": 0.020637996000004932,
+    "tests/influence/torch/test_functional.py::test_matrix_jacobian_product[46-1-632]": 0.01833421299852489,
+    "tests/influence/torch/test_functional.py::test_matrix_jacobian_product[50-3-120]": 0.01751421799781383,
+    "tests/influence/torch/test_functional.py::test_mixed_derivatives[100-5-512]": 1.7785364399987884,
+    "tests/influence/torch/test_functional.py::test_mixed_derivatives[25-10-734]": 0.27127136799936125,
+    "tests/influence/torch/test_functional.py::test_mixed_derivatives[46-1-1000]": 0.18510219099880487,
+    "tests/influence/torch/test_functional.py::test_mixed_derivatives[50-3-100]": 0.055743695000273874,
+    "tests/influence/torch/test_functional.py::test_per_sample_gradient[100-5-120]": 0.022160912998515414,
+    "tests/influence/torch/test_functional.py::test_per_sample_gradient[25-10-550]": 0.0378074430009292,
+    "tests/influence/torch/test_functional.py::test_per_sample_gradient[46-6-632]": 0.033401361002688645,
+    "tests/influence/torch/test_functional.py::test_per_sample_gradient[50-3-120]": 0.02004740899974422,
+    "tests/influence/torch/test_influence_model.py::test_influence_linear_model[cg-train_set_size_200-perturbation]": 4.1002855009992345,
+    "tests/influence/torch/test_influence_model.py::test_influence_linear_model[cg-train_set_size_200-up]": 4.092100218998894,
+    "tests/influence/torch/test_influence_model.py::test_influence_linear_model[direct-train_set_size_200-perturbation]": 0.5856196849999833,
+    "tests/influence/torch/test_influence_model.py::test_influence_linear_model[direct-train_set_size_200-up]": 0.1793102950014145,
+    "tests/influence/torch/test_influence_model.py::test_influence_linear_model[lissa-train_set_size_200-perturbation]": 74.52067036900007,
+    "tests/influence/torch/test_influence_model.py::test_influence_linear_model[lissa-train_set_size_200-up]": 72.80234433299847,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv1d_nn_pert]": 2.6213616719978745,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv1d_nn_up]": 2.9271264809995046,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv1d_no_grad_up]": 1.1280039110006328,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv2d_nn_pert]": 16.078887900001064,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv2d_nn_up]": 16.092805495001812,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv3d_nn_pert]": 5.826150597002197,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[conv3d_nn_up]": 5.808433192996745,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[simple_nn_class_up]": 3.4398634410008526,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[simple_nn_pert]": 1.783800326000346,
+    "tests/influence/torch/test_influence_model.py::test_influences_arnoldi[simple_nn_up]": 1.5235134640006436,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv1d_nn_pert]": 2.470179049998478,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv1d_nn_up]": 2.3925959699990926,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv1d_no_grad_up]": 0.7791441699991992,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv2d_nn_pert]": 1.128924710001229,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv2d_nn_up]": 1.256267286998991,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv3d_nn_pert]": 1.2739636030000838,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[conv3d_nn_up]": 1.2143029310009297,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[simple_nn_class_up]": 3.279752685999483,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[simple_nn_pert]": 0.7043358350001654,
+    "tests/influence/torch/test_influence_model.py::test_influences_ekfac[simple_nn_up]": 0.7221000240006106,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_nn_pert-cg]": 2.666355408999152,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_nn_pert-lissa]": 3.536100011000599,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_nn_up-cg]": 2.8996486520009057,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_nn_up-lissa]": 3.648799233000318,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_no_grad_up-cg]": 0.846027261000927,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv1d_no_grad_up-lissa]": 1.46926116800023,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv2d_nn_pert-cg]": 1.322623816999112,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv2d_nn_pert-lissa]": 2.4566458920016885,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv2d_nn_up-cg]": 1.3719535260006523,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv2d_nn_up-lissa]": 2.3520564940008626,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv3d_nn_pert-cg]": 1.4215319300001283,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv3d_nn_pert-lissa]": 2.5365598410025996,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv3d_nn_up-cg]": 1.4816708039998048,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[conv3d_nn_up-lissa]": 2.420441305001077,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_class_up-cg]": 3.5962213779985177,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_class_up-lissa]": 4.116930427000625,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_pert-cg]": 0.8423471179994522,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_pert-lissa]": 1.8240221239993843,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_up-cg]": 0.8927097550003964,
+    "tests/influence/torch/test_influence_model.py::test_influences_nn[simple_nn_up-lissa]": 1.7715864019992296,
+    "tests/influence/torch/test_util.py::test_align_structure_error[source0-target0]": 0.008872623999195639,
+    "tests/influence/torch/test_util.py::test_align_structure_error[source1-target1]": 0.008990501999505796,
+    "tests/influence/torch/test_util.py::test_align_structure_error[source2-unsupported]": 0.008625348000350641,
+    "tests/influence/torch/test_util.py::test_align_structure_success[source0-target0]": 0.009682354999313247,
+    "tests/influence/torch/test_util.py::test_align_structure_success[source1-target1]": 0.011076738001065678,
+    "tests/influence/torch/test_util.py::test_align_structure_success[source2-target2]": 0.008422474998951657,
+    "tests/influence/torch/test_util.py::test_batch_hvp[model_data0-1e-05]": 0.018433343999276985,
+    "tests/influence/torch/test_util.py::test_batch_hvp[model_data1-1e-05]": 0.04329261199745815,
+    "tests/influence/torch/test_util.py::test_batch_hvp[model_data2-1e-05]": 0.021779085998787195,
+    "tests/influence/torch/test_util.py::test_batch_hvp[model_data3-1e-05]": 0.02447877800113929,
+    "tests/influence/torch/test_util.py::test_batch_hvp[model_data4-1e-05]": 0.027325978000590112,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data0-4-avg]": 0.05654373300058069,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data0-4-full]": 0.048235695001494605,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data1-5-avg]": 0.10194805900027859,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data1-5-full]": 0.07934144700084289,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data2-10-avg]": 0.04320316699704563,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data2-10-full]": 0.03360124099890527,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data3-8-avg]": 0.062039004000325804,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data3-8-full]": 0.039968260998648475,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data4-4-avg]": 0.5075304600013624,
+    "tests/influence/torch/test_util.py::test_get_hvp_function[model_data4-4-full]": 0.29033965000235185,
+    "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data0-4-200-0.0001]": 6.130291282001053,
+    "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data1-5-70-0.001]": 7.575732932000392,
+    "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data2-10-50-0.0001]": 5.145587835002516,
+    "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data3-8-160-1e-05]": 9.05795658399984,
+    "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx[model_data4-4-250-1e-05]": 15.930120687000453,
+    "tests/influence/torch/test_util.py::test_lanzcos_low_rank_hessian_approx_exception": 0.010992516999976942,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-30-5]": 0.3716939040004945,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-30-6]": 0.3245709369984979,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-45-5]": 0.4916222280025977,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-45-6]": 0.44272739400003047,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-50-5]": 11.764691698001116,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions0-50-6]": 6.499053524999908,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-30-5]": 0.3747018210015085,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-30-6]": 0.33021277699845086,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-45-5]": 0.5002059710004687,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-45-6]": 0.46272212300027604,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-50-5]": 0.5805674699986412,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions1-50-6]": 0.5113370569997642,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-30-5]": 0.44749919500100077,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-30-6]": 0.37910716500118724,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-45-5]": 0.5593350939998345,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-45-6]": 0.47198495200063917,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-50-5]": 0.5512238980008988,
+    "tests/influence/torch/test_util.py::test_torch_dataset_to_dask_array[tailing_dimensions2-50-6]": 0.5188143759987724,
+    "tests/test_plugin.py::test_failure": 0.003285975997641799,
+    "tests/test_plugin.py::test_fixture_call_no_arguments": 0.0033647860000201035,
+    "tests/test_plugin.py::test_fixture_only[1]": 0.0034970750002685236,
+    "tests/test_plugin.py::test_fixture_only[2]": 0.0034195480002381373,
+    "tests/test_plugin.py::test_marker_and_fixture[1]": 0.0034149520015489543,
+    "tests/test_plugin.py::test_marker_and_fixture[2]": 0.003513548002956668,
+    "tests/test_plugin.py::test_marker_ignore_exception[0]": 0.003384523999557132,
+    "tests/test_plugin.py::test_marker_ignore_exception[1]": 0.00331666099918948,
+    "tests/test_plugin.py::test_marker_ignore_exception[2]": 0.00323147399831214,
+    "tests/test_plugin.py::test_marker_ignore_exception[3]": 0.0032334910029021557,
+    "tests/test_plugin.py::test_marker_ignore_exception[4]": 0.003230911001082859,
+    "tests/test_plugin.py::test_marker_only[0]": 0.006288947000939515,
+    "tests/test_plugin.py::test_marker_only_with_data_fixture[0]": 0.003481858999293763,
+    "tests/test_plugin.py::test_marker_only_with_data_fixture[1]": 0.003525184998579789,
     "tests/test_plugin.py::test_marker_only_with_data_fixture[2]": 0.0012167239910922945,
-    "tests/test_results.py::test_adding_different_indices[indices_10-names_10-values_10-indices_20-names_20-values_20-expected_indices0-expected_names0-expected_values0]": 0.0020641259907279164,
-    "tests/test_results.py::test_adding_different_indices[indices_11-names_11-values_11-indices_21-names_21-values_21-expected_indices1-expected_names1-expected_values1]": 0.002675808995263651,
-    "tests/test_results.py::test_adding_different_indices[indices_12-names_12-values_12-indices_22-names_22-values_22-expected_indices2-expected_names2-expected_values2]": 0.002674269024282694,
-    "tests/test_results.py::test_adding_different_indices[indices_13-names_13-values_13-indices_23-names_23-values_23-expected_indices3-expected_names3-expected_values3]": 0.0020707659714389592,
-    "tests/test_results.py::test_adding_random": 0.0034820580040104687,
-    "tests/test_results.py::test_dataframe_sorting[values0-names0-ranks_asc0]": 0.0029723149491474032,
-    "tests/test_results.py::test_dataframe_sorting[values1-names1-ranks_asc1]": 0.002218269946752116,
-    "tests/test_results.py::test_empty[0]": 0.0012037760170642287,
-    "tests/test_results.py::test_empty[5]": 0.001365817035548389,
-    "tests/test_results.py::test_empty_deprecation": 0.0013571020099334419,
-    "tests/test_results.py::test_equality[values0-names0]": 0.0021291770099196583,
-    "tests/test_results.py::test_equality[values1-names1]": 0.0016342299932148308,
-    "tests/test_results.py::test_extra_values[extra_values0]": 0.001437259983504191,
-    "tests/test_results.py::test_extra_values[extra_values1]": 0.0015066640044096857,
-    "tests/test_results.py::test_from_random_creation[-1.0-10]": 0.0015409209881909192,
-    "tests/test_results.py::test_from_random_creation[-1.0-1]": 0.0014630080258939415,
-    "tests/test_results.py::test_from_random_creation[1.0-10]": 0.0012284110125619918,
-    "tests/test_results.py::test_from_random_creation[1.0-1]": 0.0013108189741615206,
-    "tests/test_results.py::test_from_random_creation[None-10]": 0.0012196720344945788,
-    "tests/test_results.py::test_from_random_creation[None-1]": 0.0015253000019583851,
-    "tests/test_results.py::test_from_random_creation_errors": 0.0009378239628858864,
-    "tests/test_results.py::test_get_idx": 0.0010275309905409813,
-    "tests/test_results.py::test_indexing[values0-names0-ranks_asc0]": 0.0014630479854531586,
-    "tests/test_results.py::test_indexing[values1-names1-ranks_asc1]": 0.001598447997821495,
-    "tests/test_results.py::test_iter[values0-names0-ranks_asc0]": 0.0013525879476219416,
-    "tests/test_results.py::test_iter[values1-names1-ranks_asc1]": 0.0014122460270300508,
-    "tests/test_results.py::test_names[data_names0]": 0.0015603950014337897,
-    "tests/test_results.py::test_serialization[values0-None-dumps-loads0]": 0.001649087033001706,
-    "tests/test_results.py::test_serialization[values0-None-dumps-loads1]": 0.0016458219906780869,
-    "tests/test_results.py::test_serialization[values1-None-dumps-loads0]": 0.0015400749980472028,
-    "tests/test_results.py::test_serialization[values1-None-dumps-loads1]": 0.0019450989784672856,
-    "tests/test_results.py::test_sorting[values0-names0-ranks_asc0]": 0.0016402129840571433,
-    "tests/test_results.py::test_sorting[values1-names1-ranks_asc1]": 0.0016363860049750656,
-    "tests/test_results.py::test_todataframe[values0-names0-ranks_asc0]": 0.0023001570079941303,
-    "tests/test_results.py::test_todataframe[values1-names1-ranks_asc1]": 0.002222412033006549,
-    "tests/test_results.py::test_types[indices0-int32-data_names0-<U1]": 0.0014417339698411524,
-    "tests/test_results.py::test_types[indices1-int64-data_names1-int64]": 0.0016339810099452734,
-    "tests/test_results.py::test_types[indices2-int32-data_names2-float64]": 0.0016496109892614186,
-    "tests/test_results.py::test_updating": 0.0011235670244786888,
+    "tests/test_results.py::test_adding_different_indices[indices_10-names_10-values_10-indices_20-names_20-values_20-expected_indices0-expected_names0-expected_values0]": 0.005255071999272332,
+    "tests/test_results.py::test_adding_different_indices[indices_11-names_11-values_11-indices_21-names_21-values_21-expected_indices1-expected_names1-expected_values1]": 0.005423882998002227,
+    "tests/test_results.py::test_adding_different_indices[indices_12-names_12-values_12-indices_22-names_22-values_22-expected_indices2-expected_names2-expected_values2]": 0.0054176030007511145,
+    "tests/test_results.py::test_adding_different_indices[indices_13-names_13-values_13-indices_23-names_23-values_23-expected_indices3-expected_names3-expected_values3]": 0.005320759000824182,
+    "tests/test_results.py::test_adding_random": 0.008090201999948476,
+    "tests/test_results.py::test_dataframe_sorting[values0-names0-ranks_asc0]": 0.008680627001012908,
+    "tests/test_results.py::test_dataframe_sorting[values1-names1-ranks_asc1]": 0.006279957000515424,
+    "tests/test_results.py::test_empty[0]": 0.003641559000243433,
+    "tests/test_results.py::test_empty[5]": 0.0034929009998450056,
+    "tests/test_results.py::test_empty_deprecation": 0.0035087529995507794,
+    "tests/test_results.py::test_equality[values0-names0]": 0.004204769002171815,
+    "tests/test_results.py::test_equality[values1-names1]": 0.004355032000603387,
+    "tests/test_results.py::test_extra_values[extra_values0]": 0.004352300002210541,
+    "tests/test_results.py::test_extra_values[extra_values1]": 0.004183073002423043,
+    "tests/test_results.py::test_from_random_creation[-1.0-10]": 0.003700102000948391,
+    "tests/test_results.py::test_from_random_creation[-1.0-1]": 0.0037578669980575796,
+    "tests/test_results.py::test_from_random_creation[1.0-10]": 0.0037663590010197368,
+    "tests/test_results.py::test_from_random_creation[1.0-1]": 0.0038107559994386975,
+    "tests/test_results.py::test_from_random_creation[None-10]": 0.0036299099992902484,
+    "tests/test_results.py::test_from_random_creation[None-1]": 0.0037064750013087178,
+    "tests/test_results.py::test_from_random_creation_errors": 0.00302240999735659,
+    "tests/test_results.py::test_get_idx": 0.0032395419984823093,
+    "tests/test_results.py::test_indexing[values0-names0-ranks_asc0]": 0.004395261999889044,
+    "tests/test_results.py::test_indexing[values1-names1-ranks_asc1]": 0.004297610999856261,
+    "tests/test_results.py::test_iter[values0-names0-ranks_asc0]": 0.003994380002040998,
+    "tests/test_results.py::test_iter[values1-names1-ranks_asc1]": 0.004105043000890873,
+    "tests/test_results.py::test_names[data_names0]": 0.003977623999162461,
+    "tests/test_results.py::test_serialization[values0-None-dumps-loads0]": 0.004757332000735914,
+    "tests/test_results.py::test_serialization[values0-None-dumps-loads1]": 0.004781503001140663,
+    "tests/test_results.py::test_serialization[values1-None-dumps-loads0]": 0.004697013004260953,
+    "tests/test_results.py::test_serialization[values1-None-dumps-loads1]": 0.004567677997329156,
+    "tests/test_results.py::test_sorting[values0-names0-ranks_asc0]": 0.004288738000468584,
+    "tests/test_results.py::test_sorting[values1-names1-ranks_asc1]": 0.00483136099865078,
+    "tests/test_results.py::test_todataframe[values0-names0-ranks_asc0]": 0.0060278359978838125,
+    "tests/test_results.py::test_todataframe[values1-names1-ranks_asc1]": 0.006075087001590873,
+    "tests/test_results.py::test_types[indices0-int32-data_names0-<U1]": 0.004259749999619089,
+    "tests/test_results.py::test_types[indices1-int64-data_names1-int64]": 0.00433944100223016,
+    "tests/test_results.py::test_types[indices2-int32-data_names2-float64]": 0.004848469001444755,
+    "tests/test_results.py::test_updating": 0.003732475999640883,
+    "tests/test_results.py::test_updating_order_invariance": 0.03923124100037967,
+    "tests/utils/test_caching.py::test_cache_backend_serialization[disk]": 0.008889401000487851,
+    "tests/utils/test_caching.py::test_cache_backend_serialization[in-memory]": 0.007174410997322411,
+    "tests/utils/test_caching.py::test_cache_backend_serialization[memcached]": 0.020350722999864956,
+    "tests/utils/test_caching.py::test_cache_ignore_args[disk]": 0.00907280900173646,
+    "tests/utils/test_caching.py::test_cache_ignore_args[in-memory]": 0.00826339699960954,
+    "tests/utils/test_caching.py::test_cache_ignore_args[memcached]": 0.011436692000643234,
+    "tests/utils/test_caching.py::test_cache_time_threshold[disk]": 0.011453412000264507,
+    "tests/utils/test_caching.py::test_cache_time_threshold[in-memory]": 0.010395613997388864,
+    "tests/utils/test_caching.py::test_cache_time_threshold[memcached]": 0.015287686999727157,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args10-args20-True]": 0.008108977001029416,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args11-args21-True]": 0.00850183300099161,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args12-args22-True]": 0.007250777001900133,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args13-args23-True]": 0.007955973998832633,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args14-args24-True]": 0.008939844999986235,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args15-args25-True]": 0.010985593999066623,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args16-args26-True]": 0.009679172000687686,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args17-args27-False]": 0.007626642000104766,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args18-args28-False]": 0.008609937000073842,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments[args19-args29-False]": 0.008109001000775606,
+    "tests/utils/test_caching.py::test_cached_func_hash_arguments_of_method": 0.008681645000251592,
+    "tests/utils/test_caching.py::test_cached_func_hash_function[<lambda>-<lambda>-True]": 0.013553835999118746,
+    "tests/utils/test_caching.py::test_cached_func_hash_function[foo-<lambda>-False]": 0.009608976999516017,
+    "tests/utils/test_caching.py::test_cached_func_hash_function[foo-foo-True]": 0.007844682999348151,
+    "tests/utils/test_caching.py::test_cached_func_hash_function[foo-foo_duplicate-True]": 0.00921746999847528,
+    "tests/utils/test_caching.py::test_cached_func_hash_function[foo-foo_with_random-False]": 0.006780997000532807,
+    "tests/utils/test_caching.py::test_cached_func_hash_function[foo_with_random-foo_with_random_and_sleep-False]": 0.008397087000048487,
     "tests/utils/test_caching.py::test_failed_connection": 0.0039788429858163,
+    "tests/utils/test_caching.py::test_faster_with_repeated_training[disk]": 5.499508081999011,
+    "tests/utils/test_caching.py::test_faster_with_repeated_training[in-memory]": 5.596929604998877,
+    "tests/utils/test_caching.py::test_faster_with_repeated_training[memcached]": 6.545152930997574,
+    "tests/utils/test_caching.py::test_memcached_failed_connection": 0.009630470000047353,
     "tests/utils/test_caching.py::test_memcached_faster_with_repeated_training": 5.003239913989091,
     "tests/utils/test_caching.py::test_memcached_parallel_jobs[joblib]": 3.1677759810409043,
     "tests/utils/test_caching.py::test_memcached_parallel_jobs[ray-external]": 38.430890925985295,
@@ -151,303 +390,439 @@
     "tests/utils/test_caching.py::test_memcached_parallel_repeated_training[ray-local-20-2-20-10]": 0.007027510990155861,
     "tests/utils/test_caching.py::test_memcached_repeated_training": 2.3077823049970903,
     "tests/utils/test_caching.py::test_memcached_single_job": 0.007132280006771907,
-    "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.1]": 0.009810923977056518,
-    "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.5]": 0.0023630280047655106,
-    "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.8]": 0.002483188029145822,
-    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.1-kwargs0]": 0.0022864479979034513,
-    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.1-kwargs1]": 0.001960736990440637,
-    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.5-kwargs0]": 0.0018571619875729084,
-    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.5-kwargs1]": 0.0019256969972047955,
-    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.8-kwargs0]": 0.0020103229908272624,
-    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.8-kwargs1]": 0.001870437990874052,
-    "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.1]": 0.004145220998907462,
-    "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.5]": 0.002273507008794695,
-    "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.8]": 0.0025340290158055723,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.1]": 0.002445343037834391,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.5]": 0.002387374988757074,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.8]": 0.0025074610312003642,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.1]": 0.0031885300122667104,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.5]": 0.0018069100042339414,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.8]": 0.0019649149908218533,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.1-kwargs0]": 0.002473844971973449,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.1-kwargs1]": 0.0024133779807016253,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.5-kwargs0]": 0.0023138070246204734,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.5-kwargs1]": 0.002177672984544188,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.8-kwargs0]": 0.0030658979958388954,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.8-kwargs1]": 0.002469450992066413,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.1]": 0.0016314840177074075,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.5]": 0.0017394520109519362,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.8]": 0.0017109749896917492,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.1]": 0.003284825972514227,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.5]": 0.0038210980128496885,
-    "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.8]": 0.0023955479555297643,
-    "tests/utils/test_dataset.py::test_grouped_dataset_results": 0.00312941602896899,
-    "tests/utils/test_numeric.py::test_powerset": 0.002356015960685909,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[0-2-ValueError]": 0.0011365640093572438,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[1-2-ValueError]": 0.0010459299955982715,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[10-1-ValueError]": 0.0011281229672022164,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[10-2-None]": 0.001765107037499547,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[2-10-None]": 0.001528021995909512,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[4--2-ValueError]": 0.0011659429874271154,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[7-23-None]": 0.001419320033164695,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[10-2]": 0.001462101994547993,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[2-10]": 0.001395261992001906,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[7-23]": 0.001416039012838155,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[10-2]": 0.0015627649845555425,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[2-10]": 0.0014263579796534032,
-    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[7-23]": 0.0016800050216261297,
-    "tests/utils/test_numeric.py::test_random_powerset[0-1]": 0.0012409990013111383,
-    "tests/utils/test_numeric.py::test_random_powerset[1-10]": 0.0014637470012530684,
-    "tests/utils/test_numeric.py::test_random_powerset[10-1024]": 0.0079122620227281,
-    "tests/utils/test_numeric.py::test_random_powerset[5-128]": 0.0020825770043302327,
-    "tests/utils/test_numeric.py::test_random_powerset_label_min[0-10-3-1000]": 0.11152737599331886,
-    "tests/utils/test_numeric.py::test_random_powerset_label_min[1-10-3-1000]": 0.11375491399667226,
-    "tests/utils/test_numeric.py::test_random_powerset_label_min[2-10-3-1000]": 0.11396494103246368,
-    "tests/utils/test_numeric.py::test_random_powerset_reproducible[10-1024]": 0.013066521001746878,
-    "tests/utils/test_numeric.py::test_random_powerset_stochastic[10-1024]": 0.012338358006672934,
-    "tests/utils/test_numeric.py::test_random_subset_of_size[0-0-None]": 0.0015464180323760957,
-    "tests/utils/test_numeric.py::test_random_subset_of_size[0-1-ValueError]": 0.001127758005168289,
-    "tests/utils/test_numeric.py::test_random_subset_of_size[10-0-None]": 0.0013323969906195998,
-    "tests/utils/test_numeric.py::test_random_subset_of_size[10-3-None]": 0.0015970039821695536,
-    "tests/utils/test_numeric.py::test_random_subset_of_size[1000-40-None]": 0.001427212991984561,
-    "tests/utils/test_numeric.py::test_random_subset_of_size_stochastic[10-3]": 0.001142591005191207,
-    "tests/utils/test_numeric.py::test_random_subset_of_size_stochastic[1000-40]": 0.0012538870214484632,
-    "tests/utils/test_numeric.py::test_running_moments": 0.35335890398710035,
-    "tests/utils/test_parallel.py::test_chunkification[joblib-data0-3-expected_chunks0]": 0.0042906299931928515,
-    "tests/utils/test_parallel.py::test_chunkification[joblib-data1-2-expected_chunks1]": 0.004308464995119721,
-    "tests/utils/test_parallel.py::test_chunkification[joblib-data2-2-expected_chunks2]": 0.004244079987984151,
-    "tests/utils/test_parallel.py::test_chunkification[joblib-data3-3-expected_chunks3]": 0.004028873983770609,
-    "tests/utils/test_parallel.py::test_chunkification[joblib-data4-5-expected_chunks4]": 0.004101024009287357,
-    "tests/utils/test_parallel.py::test_chunkification[joblib-data5-42-expected_chunks5]": 0.004789252998307347,
-    "tests/utils/test_parallel.py::test_chunkification[joblib-data6-42-expected_chunks6]": 0.004256373038515449,
-    "tests/utils/test_parallel.py::test_chunkification[joblib-data7-4-expected_chunks7]": 0.004143773025134578,
-    "tests/utils/test_parallel.py::test_chunkification[joblib-data8-4-expected_chunks8]": 0.0040604640380479395,
-    "tests/utils/test_parallel.py::test_chunkification[ray-external-data0-3-expected_chunks0]": 0.0060307729872874916,
-    "tests/utils/test_parallel.py::test_chunkification[ray-external-data1-2-expected_chunks1]": 0.005929058010224253,
-    "tests/utils/test_parallel.py::test_chunkification[ray-external-data2-2-expected_chunks2]": 0.009121662005782127,
-    "tests/utils/test_parallel.py::test_chunkification[ray-external-data3-3-expected_chunks3]": 0.009956339985365048,
-    "tests/utils/test_parallel.py::test_chunkification[ray-external-data4-5-expected_chunks4]": 0.010149178997380659,
-    "tests/utils/test_parallel.py::test_chunkification[ray-external-data5-42-expected_chunks5]": 0.010347278992412612,
-    "tests/utils/test_parallel.py::test_chunkification[ray-external-data6-42-expected_chunks6]": 0.010047424992080778,
-    "tests/utils/test_parallel.py::test_chunkification[ray-external-data7-4-expected_chunks7]": 0.008645244990475476,
-    "tests/utils/test_parallel.py::test_chunkification[ray-external-data8-4-expected_chunks8]": 0.009245932975318283,
-    "tests/utils/test_parallel.py::test_chunkification[ray-local-data0-3-expected_chunks0]": 0.0045589170476887375,
-    "tests/utils/test_parallel.py::test_chunkification[ray-local-data1-2-expected_chunks1]": 0.004910157964332029,
-    "tests/utils/test_parallel.py::test_chunkification[ray-local-data2-2-expected_chunks2]": 0.004910080024274066,
-    "tests/utils/test_parallel.py::test_chunkification[ray-local-data3-3-expected_chunks3]": 0.0059317940031178296,
-    "tests/utils/test_parallel.py::test_chunkification[ray-local-data4-5-expected_chunks4]": 0.008992511982796714,
-    "tests/utils/test_parallel.py::test_chunkification[ray-local-data5-42-expected_chunks5]": 0.008223566022934392,
-    "tests/utils/test_parallel.py::test_chunkification[ray-local-data6-42-expected_chunks6]": 0.007052068045595661,
-    "tests/utils/test_parallel.py::test_chunkification[ray-local-data7-4-expected_chunks7]": 0.004718763986602426,
-    "tests/utils/test_parallel.py::test_chunkification[ray-local-data8-4-expected_chunks8]": 0.005322564014932141,
-    "tests/utils/test_parallel.py::test_effective_n_jobs[joblib]": 0.0014253620174713433,
-    "tests/utils/test_parallel.py::test_effective_n_jobs[ray-external]": 3.978927739954088,
-    "tests/utils/test_parallel.py::test_effective_n_jobs[ray-local]": 4.104055134986993,
-    "tests/utils/test_parallel.py::test_future_cancellation[joblib]": 0.005014022986870259,
-    "tests/utils/test_parallel.py::test_future_cancellation[ray-external]": 1.9293224809807725,
-    "tests/utils/test_parallel.py::test_future_cancellation[ray-local]": 0.07703918303013779,
-    "tests/utils/test_parallel.py::test_futures_executor_map[joblib]": 1.5601177359640133,
-    "tests/utils/test_parallel.py::test_futures_executor_map[ray-external]": 0.09417001300607808,
-    "tests/utils/test_parallel.py::test_futures_executor_map[ray-local]": 0.09271710200118832,
-    "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[joblib]": 0.007176648010499775,
-    "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[ray-external]": 1.090440120024141,
-    "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[ray-local]": 1.095393077004701,
-    "tests/utils/test_parallel.py::test_futures_executor_submit[joblib]": 1.8566069509834051,
-    "tests/utils/test_parallel.py::test_futures_executor_submit[ray-external]": 0.04992300402955152,
-    "tests/utils/test_parallel.py::test_futures_executor_submit[ray-local]": 0.048481280013220385,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices0-expected0]": 0.0015987549850251526,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices1-expected1]": 0.001547530002426356,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices2-expected2]": 0.001560483971843496,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-numpy-indices4-45]": 0.00178057502489537,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-range-indices3-expected3]": 0.0015469170466531068,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices0-expected0]": 0.0018091480305884033,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices1-expected1]": 0.01276223495369777,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices2-expected2]": 0.012882986018666998,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-numpy-indices4-45]": 0.01399321696953848,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-range-indices3-expected3]": 0.012885421980172396,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices0-expected0]": 0.15361307095736265,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices1-expected1]": 0.8156346119940281,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices2-expected2]": 1.3068530370073859,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-numpy-indices4-45]": 0.01750938399345614,
-    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-range-indices3-expected3]": 0.017205809010192752,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices0-expected0]": 0.0029827099933754653,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices1-expected1]": 0.0027304230316076428,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices2-expected2]": 0.0026203590095974505,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-numpy-indices4-45]": 0.003456770005868748,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-range-indices3-expected3]": 0.0027074709651060402,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices0-expected0]": 0.8282912400027271,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices1-expected1]": 2.2837093910493422,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices2-expected2]": 2.4645657170040067,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-numpy-indices4-45]": 2.281004316988401,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-range-indices3-expected3]": 2.393285626982106,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices0-expected0]": 1.903353853005683,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices1-expected1]": 2.947957994969329,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices2-expected2]": 3.211508878011955,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-numpy-indices4-45]": 3.3349247129808646,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-range-indices3-expected3]": 3.599037625041092,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices0-expected0]": 0.016201907012145966,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices1-expected1]": 0.013995222019730136,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices2-expected2]": 0.013650566979777068,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-numpy-indices4-45]": 0.013722714997129515,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-range-indices3-expected3]": 0.013983122975332662,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices0-expected0]": 1.5035187809844501,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices1-expected1]": 2.235937710967846,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices2-expected2]": 2.1283504489983898,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-numpy-indices4-45]": 2.0944344620220363,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-range-indices3-expected3]": 2.104675643990049,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices0-expected0]": 1.7145587989652995,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices1-expected1]": 2.772829012013972,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices2-expected2]": 3.1254515810287558,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-numpy-indices4-45]": 3.4023931239789817,
-    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-range-indices3-expected3]": 3.7103631219943054,
-    "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[joblib]": 0.01629631401738152,
-    "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[ray-external]": 3.550109267991502,
-    "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[ray-local]": 3.186494815017795,
-    "tests/utils/test_parallel.py::test_map_reduce_seeding[joblib-42-12]": 0.05403909899177961,
-    "tests/utils/test_parallel.py::test_map_reduce_seeding[ray-external-42-12]": 9.918427228025394,
-    "tests/utils/test_parallel.py::test_map_reduce_seeding[ray-local-42-12]": 9.834357938991161,
-    "tests/utils/test_parallel.py::test_wrap_function[joblib]": 0.0031614619656465948,
-    "tests/utils/test_parallel.py::test_wrap_function[ray-external]": 3.1981390729779378,
-    "tests/utils/test_parallel.py::test_wrap_function[ray-local]": 3.2998613989911973,
-    "tests/utils/test_score.py::test_compose_score": 0.0027295449981465936,
-    "tests/utils/test_score.py::test_scorer": 0.0051104900194332,
-    "tests/utils/test_score.py::test_squashed_r2": 0.001943372975802049,
-    "tests/utils/test_score.py::test_squashed_variance": 0.001487176021328196,
-    "tests/utils/test_status.py::test_and_status": 0.001112824014853686,
-    "tests/utils/test_status.py::test_not_status": 0.0010235870140604675,
-    "tests/utils/test_status.py::test_or_status": 0.0009352969937026501,
+    "tests/utils/test_caching.py::test_parallel_jobs[joblib-disk]": 0.008082594998995773,
+    "tests/utils/test_caching.py::test_parallel_jobs[joblib-in-memory]": 0.007858986000428558,
+    "tests/utils/test_caching.py::test_parallel_jobs[joblib-memcached]": 5.864486223999847,
+    "tests/utils/test_caching.py::test_parallel_jobs[ray-external-disk]": 0.019337756999448175,
+    "tests/utils/test_caching.py::test_parallel_jobs[ray-external-in-memory]": 3.8737009590004163,
+    "tests/utils/test_caching.py::test_parallel_jobs[ray-external-memcached]": 0.010424148002130096,
+    "tests/utils/test_caching.py::test_parallel_jobs[ray-local-disk]": 0.006320855998637853,
+    "tests/utils/test_caching.py::test_parallel_jobs[ray-local-in-memory]": 0.007159704999139649,
+    "tests/utils/test_caching.py::test_parallel_jobs[ray-local-memcached]": 0.010268650999933016,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-disk-20-1-10-5]": 0.040544517996750074,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-disk-20-1-20-10]": 0.041609834999690065,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-disk-20-2-10-5]": 0.450297680001313,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-disk-20-2-20-10]": 0.41885778900177684,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-in-memory-20-1-10-5]": 0.04637932000150613,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-in-memory-20-1-20-10]": 0.038561840998227126,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-in-memory-20-2-10-5]": 4.16153838199898,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-in-memory-20-2-20-10]": 0.47474137900280766,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-memcached-20-1-10-5]": 0.03560425399882661,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-memcached-20-1-20-10]": 0.04425754300064,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-memcached-20-2-10-5]": 0.46746473100029107,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[joblib-memcached-20-2-20-10]": 0.47426626100059366,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-disk-20-1-10-5]": 0.019769640000959043,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-disk-20-1-20-10]": 0.02465987799951108,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-disk-20-2-10-5]": 0.012952293998750974,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-disk-20-2-20-10]": 0.010107056999913766,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-in-memory-20-1-10-5]": 0.013676337999640964,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-in-memory-20-1-20-10]": 0.009283014000175172,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-in-memory-20-2-10-5]": 0.014747097000508802,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-in-memory-20-2-20-10]": 0.012189770999611937,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-memcached-20-1-10-5]": 0.014756809001482907,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-memcached-20-1-20-10]": 0.014543373998094467,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-memcached-20-2-10-5]": 0.018690378999963286,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-external-memcached-20-2-20-10]": 0.017414769001334207,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-disk-20-1-10-5]": 0.00978782600031991,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-disk-20-1-20-10]": 0.008025870998608298,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-disk-20-2-10-5]": 0.00932121699952404,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-disk-20-2-20-10]": 0.012999636999666109,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-in-memory-20-1-10-5]": 0.010384335999333416,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-in-memory-20-1-20-10]": 0.007256282997332164,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-in-memory-20-2-10-5]": 0.007955910998134641,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-in-memory-20-2-20-10]": 0.006997692000368261,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-memcached-20-1-10-5]": 0.008193191000827937,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-memcached-20-1-20-10]": 0.010128158999577863,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-memcached-20-2-10-5]": 0.013161438002498471,
+    "tests/utils/test_caching.py::test_parallel_repeated_training[ray-local-memcached-20-2-20-10]": 0.009240641998985666,
+    "tests/utils/test_caching.py::test_repeated_training[disk]": 1.2679626049975923,
+    "tests/utils/test_caching.py::test_repeated_training[in-memory]": 0.8953080740011501,
+    "tests/utils/test_caching.py::test_repeated_training[memcached]": 1.0694843190012762,
+    "tests/utils/test_caching.py::test_single_job[disk]": 0.013467190003211726,
+    "tests/utils/test_caching.py::test_single_job[in-memory]": 0.007606943001519539,
+    "tests/utils/test_caching.py::test_single_job[memcached]": 0.012050191000525956,
+    "tests/utils/test_caching.py::test_without_pymemcache": 0.0068226680014049634,
+    "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.1]": 0.020587041000908357,
+    "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.5]": 0.00390724699900602,
+    "tests/utils/test_dataset.py::test_creating_dataset_from_sklearn[0.8]": 0.00450960899979691,
+    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.1-kwargs0]": 0.0038753029984945897,
+    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.1-kwargs1]": 0.0036959019998903386,
+    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.5-kwargs0]": 0.0038168650007719407,
+    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.5-kwargs1]": 0.0037348340010794345,
+    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.8-kwargs0]": 0.003279165001004003,
+    "tests/utils/test_dataset.py::test_creating_dataset_from_x_y_arrays[0.8-kwargs1]": 0.003160262998790131,
+    "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.1]": 0.00434540000060224,
+    "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.5]": 0.004031194001072436,
+    "tests/utils/test_dataset.py::test_creating_dataset_subsclassfrom_sklearn[0.8]": 0.0037131489989405964,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.1]": 0.0038164179986779345,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.5]": 0.005253569997876184,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn[0.8]": 0.005844020997756161,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.1]": 0.003921200999684515,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.5]": 0.0038101809986983426,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_sklearn_failure[0.8]": 0.00419950299874472,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.1-kwargs0]": 0.0037465159985003993,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.1-kwargs1]": 0.0037122550002095522,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.5-kwargs0]": 0.003727491999597987,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.5-kwargs1]": 0.00471100999857299,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.8-kwargs0]": 0.004182996997769806,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays[0.8-kwargs1]": 0.004742823000924545,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.1]": 0.0034745570010272786,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.5]": 0.0029412200001388555,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_from_x_y_arrays_failure[0.8]": 0.003702544998304802,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.1]": 0.004099161000340246,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.5]": 0.004149230999246356,
+    "tests/utils/test_dataset.py::test_creating_grouped_dataset_subsclassfrom_sklearn[0.8]": 0.004327725000621285,
+    "tests/utils/test_dataset.py::test_grouped_dataset_results": 0.005107523998958641,
+    "tests/utils/test_numeric.py::test_powerset": 0.003924966000340646,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[0-2-ValueError]": 0.003189409999322379,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[1-2-ValueError]": 0.0027116169985674787,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[10-1-ValueError]": 0.0027666500009218,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[10-2-None]": 0.0031559840026602615,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[2-10-None]": 0.004578909001793363,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[4--2-ValueError]": 0.0026738769993244205,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number[7-23-None]": 0.0031755019990669098,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[10-2]": 0.0032696249982109293,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[2-10]": 0.0026570699992589653,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_reproducible[7-23]": 0.004022232000352233,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[10-2]": 0.004431671000929782,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[2-10]": 0.003217298000890878,
+    "tests/utils/test_numeric.py::test_random_matrix_with_condition_number_stochastic[7-23]": 0.002955772999484907,
+    "tests/utils/test_numeric.py::test_random_powerset[0-1]": 0.003080959002545569,
+    "tests/utils/test_numeric.py::test_random_powerset[1-10]": 0.0025249310019717086,
+    "tests/utils/test_numeric.py::test_random_powerset[10-1024]": 0.012735986998450244,
+    "tests/utils/test_numeric.py::test_random_powerset[5-128]": 0.0035692419987753965,
+    "tests/utils/test_numeric.py::test_random_powerset_label_min[0-10-3-1000]": 0.171625541001049,
+    "tests/utils/test_numeric.py::test_random_powerset_label_min[1-10-3-1000]": 0.17619158500019694,
+    "tests/utils/test_numeric.py::test_random_powerset_label_min[2-10-3-1000]": 0.1760632390014507,
+    "tests/utils/test_numeric.py::test_random_powerset_reproducible[10-1024]": 0.018217550998087972,
+    "tests/utils/test_numeric.py::test_random_powerset_stochastic[10-1024]": 0.018896675997893908,
+    "tests/utils/test_numeric.py::test_random_subset_of_size[0-0-None]": 0.002780025000902242,
+    "tests/utils/test_numeric.py::test_random_subset_of_size[0-1-ValueError]": 0.0032452249997731997,
+    "tests/utils/test_numeric.py::test_random_subset_of_size[10-0-None]": 0.0033025680004357127,
+    "tests/utils/test_numeric.py::test_random_subset_of_size[10-3-None]": 0.002845983000952401,
+    "tests/utils/test_numeric.py::test_random_subset_of_size[1000-40-None]": 0.0032918939996307017,
+    "tests/utils/test_numeric.py::test_random_subset_of_size_stochastic[10-3]": 0.002797532002659864,
+    "tests/utils/test_numeric.py::test_random_subset_of_size_stochastic[1000-40]": 0.0036268280000513187,
+    "tests/utils/test_numeric.py::test_running_moments": 0.6145333489985205,
+    "tests/utils/test_parallel.py::test_chunkification[joblib-data0-3-expected_chunks0]": 0.015510658000494004,
+    "tests/utils/test_parallel.py::test_chunkification[joblib-data1-2-expected_chunks1]": 0.012093620000086958,
+    "tests/utils/test_parallel.py::test_chunkification[joblib-data2-2-expected_chunks2]": 0.011375399999451474,
+    "tests/utils/test_parallel.py::test_chunkification[joblib-data3-3-expected_chunks3]": 0.016111063001517323,
+    "tests/utils/test_parallel.py::test_chunkification[joblib-data4-5-expected_chunks4]": 0.02149817300050927,
+    "tests/utils/test_parallel.py::test_chunkification[joblib-data5-42-expected_chunks5]": 0.013197087000662577,
+    "tests/utils/test_parallel.py::test_chunkification[joblib-data6-42-expected_chunks6]": 0.017662769996604766,
+    "tests/utils/test_parallel.py::test_chunkification[joblib-data7-4-expected_chunks7]": 0.013664767000591382,
+    "tests/utils/test_parallel.py::test_chunkification[joblib-data8-4-expected_chunks8]": 0.0129568249994918,
+    "tests/utils/test_parallel.py::test_chunkification[ray-external-data0-3-expected_chunks0]": 0.02873299299972132,
+    "tests/utils/test_parallel.py::test_chunkification[ray-external-data1-2-expected_chunks1]": 0.037400651001007645,
+    "tests/utils/test_parallel.py::test_chunkification[ray-external-data2-2-expected_chunks2]": 0.04821507099950395,
+    "tests/utils/test_parallel.py::test_chunkification[ray-external-data3-3-expected_chunks3]": 0.03959165199921699,
+    "tests/utils/test_parallel.py::test_chunkification[ray-external-data4-5-expected_chunks4]": 0.030608711000240874,
+    "tests/utils/test_parallel.py::test_chunkification[ray-external-data5-42-expected_chunks5]": 0.026263547000780818,
+    "tests/utils/test_parallel.py::test_chunkification[ray-external-data6-42-expected_chunks6]": 0.01923054399958346,
+    "tests/utils/test_parallel.py::test_chunkification[ray-external-data7-4-expected_chunks7]": 0.020033368999065715,
+    "tests/utils/test_parallel.py::test_chunkification[ray-external-data8-4-expected_chunks8]": 0.019113988000754034,
+    "tests/utils/test_parallel.py::test_chunkification[ray-local-data0-3-expected_chunks0]": 0.022260648998781107,
+    "tests/utils/test_parallel.py::test_chunkification[ray-local-data1-2-expected_chunks1]": 0.02477619599994796,
+    "tests/utils/test_parallel.py::test_chunkification[ray-local-data2-2-expected_chunks2]": 0.037821603000338655,
+    "tests/utils/test_parallel.py::test_chunkification[ray-local-data3-3-expected_chunks3]": 0.0276968880007189,
+    "tests/utils/test_parallel.py::test_chunkification[ray-local-data4-5-expected_chunks4]": 0.03822717000184639,
+    "tests/utils/test_parallel.py::test_chunkification[ray-local-data5-42-expected_chunks5]": 0.03200487200047064,
+    "tests/utils/test_parallel.py::test_chunkification[ray-local-data6-42-expected_chunks6]": 0.02251517100012279,
+    "tests/utils/test_parallel.py::test_chunkification[ray-local-data7-4-expected_chunks7]": 0.02549016900047718,
+    "tests/utils/test_parallel.py::test_chunkification[ray-local-data8-4-expected_chunks8]": 0.016007507998438086,
+    "tests/utils/test_parallel.py::test_effective_n_jobs[joblib]": 0.005121522000990808,
+    "tests/utils/test_parallel.py::test_effective_n_jobs[ray-external]": 4.8416320709984575,
+    "tests/utils/test_parallel.py::test_effective_n_jobs[ray-local]": 6.68878685799973,
+    "tests/utils/test_parallel.py::test_future_cancellation[joblib]": 0.013322050999704516,
+    "tests/utils/test_parallel.py::test_future_cancellation[ray-external]": 6.1742852379975375,
+    "tests/utils/test_parallel.py::test_future_cancellation[ray-local]": 5.196579726998607,
+    "tests/utils/test_parallel.py::test_futures_executor_map[joblib]": 2.7167825960004848,
+    "tests/utils/test_parallel.py::test_futures_executor_map[ray-external]": 0.10519307000140543,
+    "tests/utils/test_parallel.py::test_futures_executor_map[ray-local]": 0.10775902599925757,
+    "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[joblib]": 0.012954608999280026,
+    "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[ray-external]": 1.1045504180019634,
+    "tests/utils/test_parallel.py::test_futures_executor_map_with_max_workers[ray-local]": 1.100314563000211,
+    "tests/utils/test_parallel.py::test_futures_executor_submit[joblib]": 3.2937196319981012,
+    "tests/utils/test_parallel.py::test_futures_executor_submit[ray-external]": 0.06437306899897521,
+    "tests/utils/test_parallel.py::test_futures_executor_submit[ray-local]": 0.05545763400186843,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices0-expected0]": 0.0033702880009514047,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices1-expected1]": 0.003624205000960501,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-list-indices2-expected2]": 0.0034593179989315104,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-numpy-indices4-45]": 0.003431146000366425,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-1-range-indices3-expected3]": 0.003291076000095927,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices0-expected0]": 0.0043230089995631715,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices1-expected1]": 0.014759305000552558,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-list-indices2-expected2]": 0.014669898000647663,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-numpy-indices4-45]": 0.014518962998408824,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-2-range-indices3-expected3]": 0.014446292998400168,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices0-expected0]": 0.16248785400057386,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices1-expected1]": 2.277719737998268,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-list-indices2-expected2]": 3.347688416000892,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-numpy-indices4-45]": 0.04604001000188873,
+    "tests/utils/test_parallel.py::test_map_reduce_job[joblib-4-range-indices3-expected3]": 0.057255595000242465,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices0-expected0]": 0.026082702997882734,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices1-expected1]": 0.023299047999898903,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-list-indices2-expected2]": 0.02191418300026271,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-numpy-indices4-45]": 0.02673473200047738,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-1-range-indices3-expected3]": 0.027526039999429486,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices0-expected0]": 3.4228467769989948,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices1-expected1]": 4.798353305001001,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-list-indices2-expected2]": 4.636959622999711,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-numpy-indices4-45]": 4.028821964997405,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-2-range-indices3-expected3]": 4.398552747999929,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices0-expected0]": 3.734075545000451,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices1-expected1]": 5.287959784998748,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-list-indices2-expected2]": 6.245923890002814,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-numpy-indices4-45]": 6.61028953999994,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-external-4-range-indices3-expected3]": 6.340780258999075,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices0-expected0]": 0.026392571999167558,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices1-expected1]": 0.0228169030015124,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-list-indices2-expected2]": 0.026224847002595197,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-numpy-indices4-45]": 0.02119264299835777,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-1-range-indices3-expected3]": 0.02678771700084326,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices0-expected0]": 2.813331847997688,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices1-expected1]": 4.129950463000569,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-list-indices2-expected2]": 4.1853057150001405,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-numpy-indices4-45]": 3.9139689650000946,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-2-range-indices3-expected3]": 4.066097430000809,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices0-expected0]": 3.626414754000507,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices1-expected1]": 5.354816800998378,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-list-indices2-expected2]": 6.589774920001219,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-numpy-indices4-45]": 6.373054822000995,
+    "tests/utils/test_parallel.py::test_map_reduce_job[ray-local-4-range-indices3-expected3]": 6.71076984499814,
+    "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[joblib]": 0.03710782099915377,
+    "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[ray-external]": 6.3739082100000815,
+    "tests/utils/test_parallel.py::test_map_reduce_job_partial_map_and_reduce_func[ray-local]": 6.171818285998597,
+    "tests/utils/test_parallel.py::test_map_reduce_seeding[joblib-42-12]": 0.16202725999937684,
+    "tests/utils/test_parallel.py::test_map_reduce_seeding[ray-external-42-12]": 19.644846438999593,
+    "tests/utils/test_parallel.py::test_map_reduce_seeding[ray-local-42-12]": 19.494929903998127,
+    "tests/utils/test_parallel.py::test_wrap_function[joblib]": 0.010273419000441208,
+    "tests/utils/test_parallel.py::test_wrap_function[ray-external]": 4.3178896000026725,
+    "tests/utils/test_parallel.py::test_wrap_function[ray-local]": 4.386876819999088,
+    "tests/utils/test_score.py::test_compose_score": 0.003188072001648834,
+    "tests/utils/test_score.py::test_scorer": 0.006043704999683541,
+    "tests/utils/test_score.py::test_squashed_r2": 0.00286291600241384,
+    "tests/utils/test_score.py::test_squashed_variance": 0.002624727998409071,
+    "tests/utils/test_status.py::test_and_status": 0.0023914820012578275,
+    "tests/utils/test_status.py::test_not_status": 0.0024304439994011773,
+    "tests/utils/test_status.py::test_or_status": 0.0031306429991673212,
     "tests/utils/test_utility.py::test_cache[2-0-8]": 0.00677607100806199,
-    "tests/utils/test_utility.py::test_data_utility_learning_wrapper[10-2-0-8]": 0.004311377968406305,
-    "tests/utils/test_utility.py::test_data_utility_learning_wrapper[2-2-0-8]": 0.0040499519964214414,
+    "tests/utils/test_utility.py::test_data_utility_learning_wrapper[10-2-0-8]": 0.007143015998735791,
+    "tests/utils/test_utility.py::test_data_utility_learning_wrapper[2-2-0-8]": 0.008842511999318958,
     "tests/utils/test_utility.py::test_different_cache_signature[model_kwargs0-2-0-8]": 0.0038117940130177885,
     "tests/utils/test_utility.py::test_different_cache_signature[model_kwargs1-2-0-8]": 0.0034867670328821987,
-    "tests/utils/test_utility.py::test_utility_show_warnings[4-4-False]": 0.00734079402172938,
-    "tests/utils/test_utility.py::test_utility_show_warnings[4-4-True]": 0.007422954018693417,
+    "tests/utils/test_utility.py::test_different_utility_with_same_cache[2-0-8]": 0.00974041799963743,
+    "tests/utils/test_utility.py::test_utility_serialization[False-2-0-8]": 0.004299543001252459,
+    "tests/utils/test_utility.py::test_utility_serialization[True-2-0-8]": 0.004922002000967041,
+    "tests/utils/test_utility.py::test_utility_show_warnings[4-4-False]": 0.018307410000488744,
+    "tests/utils/test_utility.py::test_utility_show_warnings[4-4-True]": 0.009923514002366574,
+    "tests/utils/test_utility.py::test_utility_with_cache[2-0-8]": 0.010860190002858872,
+    "tests/value/least_core/test_common.py::test_lc_solve_problems[test_game0]": 6.664896995000163,
     "tests/value/least_core/test_common.py::test_lc_solve_problems[test_utility0]": 3.0655845460132696,
+    "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False--1-test_game0-0.1-128]": 0.10254659299971536,
+    "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False--1-test_game1-0.2-10000]": 0.95324419499957,
     "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False--1-test_utility0-0.1-128]": 0.05090764199849218,
     "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False--1-test_utility1-0.2-10000]": 0.39550038598827086,
+    "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False-1-test_game0-0.1-128]": 0.11610117799864383,
+    "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False-1-test_game1-0.2-10000]": 1.9240173660018627,
     "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False-1-test_utility0-0.1-128]": 0.054777625045971945,
     "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[False-1-test_utility1-0.2-10000]": 0.7125970929628238,
+    "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True--1-test_game0-0.1-128]": 12.35835815199971,
+    "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True--1-test_game1-0.2-10000]": 1.27118392400007,
     "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True--1-test_utility0-0.1-128]": 6.515727574034827,
     "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True--1-test_utility1-0.2-10000]": 0.6112625639943872,
+    "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True-1-test_game0-0.1-128]": 0.13497778700002527,
+    "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True-1-test_game1-0.2-10000]": 2.054010283000025,
     "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True-1-test_utility0-0.1-128]": 0.07473104700329714,
     "tests/value/least_core/test_montecarlo.py::test_montecarlo_least_core[True-1-test_utility1-0.2-10000]": 0.7888634809933137,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game0]": 0.056533884000600665,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game1]": 0.05103961900022114,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game2]": 0.04528383999968355,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game3]": 0.04622581199873821,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_game4]": 0.04450138000174775,
     "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility0]": 0.024124946998199448,
     "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility1]": 0.02425819096970372,
     "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility2]": 0.023533977015176788,
     "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility3]": 0.023558928980492055,
     "tests/value/least_core/test_naive.py::test_naive_least_core[False-test_utility4]": 0.024587185034761205,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game0]": 0.055623405996811925,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game1]": 0.05625994600086415,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game2]": 0.05063546000201313,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game3]": 0.05257723800241365,
+    "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_game4]": 0.055973189997530426,
     "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility0]": 0.025446541025303304,
     "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility1]": 0.026494102989090607,
     "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility2]": 0.02477889700094238,
     "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility3]": 0.026450325007317588,
     "tests/value/least_core/test_naive.py::test_naive_least_core[True-test_utility4]": 0.026973432017257437,
-    "tests/value/loo/test_loo.py::test_loo[100]": 3.7793434759951197,
-    "tests/value/loo/test_loo.py::test_loo[10]": 3.8455980509752408,
-    "tests/value/shapley/test_classwise.py::test_classwise_scorer_accuracies_left_right_margins[101-0.3-0.4]": 0.004718418029369786,
-    "tests/value/shapley/test_classwise.py::test_classwise_scorer_accuracies_manual_derivation": 0.022209248010767624,
-    "tests/value/shapley/test_classwise.py::test_classwise_scorer_is_symmetric[101-0.3-0.4]": 0.0053302829910535365,
-    "tests/value/shapley/test_classwise.py::test_classwise_scorer_representation": 0.002573036035755649,
-    "tests/value/shapley/test_classwise.py::test_classwise_scorer_utility[101-0.3-0.4]": 0.00688477698713541,
-    "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution-n_resample_complement_sets=1-n_samples=500]": 6.088012945023365,
-    "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_no_default-n_resample_complement_sets=1-n_samples=500]": 6.90557194603025,
-    "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_no_default_allow_empty_set-n_resample_complement_sets=1-n_samples=500]": 6.456796451995615,
-    "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_normalized-n_resample_complement_sets=1-n_samples=500]": 5.917300594970584,
-    "tests/value/shapley/test_classwise.py::test_closed_form_linear_classifier": 0.004191815009107813,
-    "tests/value/shapley/test_knn.py::test_knn_montecarlo_match": 6.380129672033945,
+    "tests/value/loo/test_loo.py::test_loo[100]": 6.34605625200129,
+    "tests/value/loo/test_loo.py::test_loo[10]": 6.683512068999335,
+    "tests/value/shapley/test_classwise.py::test_classwise_scorer_accuracies_left_right_margins[101-0.3-0.4]": 0.014495325998723274,
+    "tests/value/shapley/test_classwise.py::test_classwise_scorer_accuracies_manual_derivation": 0.059531668999625253,
+    "tests/value/shapley/test_classwise.py::test_classwise_scorer_is_symmetric[101-0.3-0.4]": 0.017718389000947354,
+    "tests/value/shapley/test_classwise.py::test_classwise_scorer_representation": 0.00893844900019758,
+    "tests/value/shapley/test_classwise.py::test_classwise_scorer_utility[101-0.3-0.4]": 0.02120917100182851,
+    "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution-n_resample_complement_sets=1-n_samples=500]": 11.03723036699921,
+    "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_no_default-n_resample_complement_sets=1-n_samples=500]": 12.916025546999663,
+    "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_no_default_allow_empty_set-n_resample_complement_sets=1-n_samples=500]": 12.068119810999633,
+    "tests/value/shapley/test_classwise.py::test_classwise_shapley[classwise_shapley_exact_solution_normalized-n_resample_complement_sets=1-n_samples=500]": 10.891289137000058,
+    "tests/value/shapley/test_classwise.py::test_closed_form_linear_classifier": 0.01344082000105118,
+    "tests/value/shapley/test_knn.py::test_knn_montecarlo_match": 11.906123751998166,
     "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[12-owen-0.1-0.0001-kwargs2]": 0.6999966300209053,
     "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[12-owen_antithetic-0.1-0.0001-kwargs3]": 1.3923712590476498,
     "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[12-permutation_montecarlo-0.1-1e-05-kwargs0]": 4.533932764985366,
     "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[3-group_testing-0.1-0.01-kwargs4]": 2.874565462989267,
     "tests/value/shapley/test_montecarlo.py::test_analytic_montecarlo_shapley[8-combinatorial_montecarlo-0.2-0.0001-kwargs1]": 4.175152084033471,
-    "tests/value/shapley/test_montecarlo.py::test_grouped_linear_montecarlo_shapley[permutation_montecarlo-kwargs0-scorer0-0.1-2-0-21-2]": 5.129105891013751,
-    "tests/value/shapley/test_montecarlo.py::test_hoeffding_bound_montecarlo[combinatorial_montecarlo-6-0.1-0.1]": 4.910673014004715,
-    "tests/value/shapley/test_montecarlo.py::test_hoeffding_bound_montecarlo[permutation_montecarlo-6-0.1-0.1]": 52.25644952899893,
+    "tests/value/shapley/test_montecarlo.py::test_games[combinatorial_montecarlo-0.2-0.0001-kwargs1-test_game0]": 8.304236846999629,
+    "tests/value/shapley/test_montecarlo.py::test_games[combinatorial_montecarlo-0.2-0.0001-kwargs1-test_game1]": 8.651754697999422,
+    "tests/value/shapley/test_montecarlo.py::test_games[group_testing-0.1-0.01-kwargs4-test_game0]": 4.506434214001274,
+    "tests/value/shapley/test_montecarlo.py::test_games[group_testing-0.1-0.01-kwargs4-test_game1]": 5.184473866002008,
+    "tests/value/shapley/test_montecarlo.py::test_games[owen-0.2-0.0001-kwargs2-test_game0]": 0.695304662000126,
+    "tests/value/shapley/test_montecarlo.py::test_games[owen-0.2-0.0001-kwargs2-test_game1]": 0.754036617001475,
+    "tests/value/shapley/test_montecarlo.py::test_games[owen_antithetic-0.1-0.0001-kwargs3-test_game0]": 1.3446016939979017,
+    "tests/value/shapley/test_montecarlo.py::test_games[owen_antithetic-0.1-0.0001-kwargs3-test_game1]": 1.7906026460022986,
+    "tests/value/shapley/test_montecarlo.py::test_games[permutation_montecarlo-0.2-0.0001-kwargs0-test_game0]": 9.640759977000926,
+    "tests/value/shapley/test_montecarlo.py::test_games[permutation_montecarlo-0.2-0.0001-kwargs0-test_game1]": 9.149135870000464,
+    "tests/value/shapley/test_montecarlo.py::test_grouped_linear_montecarlo_shapley[permutation_montecarlo-kwargs0-scorer0-0.1-2-0-21-2]": 11.869230333000814,
+    "tests/value/shapley/test_montecarlo.py::test_hoeffding_bound_montecarlo[combinatorial_montecarlo-6-0.1-0.1]": 12.159375920000457,
+    "tests/value/shapley/test_montecarlo.py::test_hoeffding_bound_montecarlo[permutation_montecarlo-6-0.1-0.1]": 121.21386299999904,
     "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[combinatorial_montecarlo-kwargs1-scorer0-0.25-2-0-21]": 17.78464582102606,
     "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[group_testing-kwargs4-scorer0-0.25-2-0-21]": 29.239474696019897,
     "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[owen-kwargs2-scorer0-0.25-2-0-21]": 4.124498174991459,
     "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[owen_antithetic-kwargs3-scorer0-0.25-2-0-21]": 7.887545032019261,
     "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_shapley[permutation_montecarlo-kwargs0-scorer0-0.25-2-0-21]": 5.8485472809989005,
-    "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[group_testing-kwargs3-scorer0-0.2-2-0-21]": 30.232708652998554,
-    "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen-kwargs1-scorer0-0.2-2-0-21]": 13.355578221991891,
-    "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen_antithetic-kwargs2-scorer0-0.2-2-0-21]": 20.621750775026157,
-    "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[permutation_montecarlo-kwargs0-scorer0-0.2-2-0-21]": 5.888187222008128,
+    "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[group_testing-kwargs3-scorer0-0.2-2-0-21]": 105.57146695700249,
+    "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen-kwargs1-scorer0-0.2-2-0-21]": 46.293949323999186,
+    "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[owen_antithetic-kwargs2-scorer0-0.2-2-0-21]": 75.77437868900051,
+    "tests/value/shapley/test_montecarlo.py::test_linear_montecarlo_with_outlier[permutation_montecarlo-kwargs0-scorer0-0.2-2-0-21]": 14.84272324000085,
     "tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-12-combinatorial_montecarlo-kwargs0]": 0.16786966001382098,
     "tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-12-owen-kwargs1]": 17.011920137971174,
     "tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-12-owen_antithetic-kwargs2]": 35.88025256394758,
     "tests/value/shapley/test_montecarlo.py::test_montecarlo_shapley_housing_dataset[12-3-4-group_testing-kwargs3]": 0.25901710899779573,
+    "tests/value/shapley/test_montecarlo.py::test_seed[combinatorial_montecarlo-kwargs0-test_game0]": 0.10237690700159874,
+    "tests/value/shapley/test_montecarlo.py::test_seed[group_testing-kwargs3-test_game0]": 1.3946212869996089,
+    "tests/value/shapley/test_montecarlo.py::test_seed[owen-kwargs1-test_game0]": 2.984055114999137,
+    "tests/value/shapley/test_montecarlo.py::test_seed[owen_antithetic-kwargs2-test_game0]": 5.702334433002761,
     "tests/value/shapley/test_naive.py::test_analytic_exact_shapley[12-combinatorial_exact_shapley-0.01-1e-05]": 2.798590613005217,
     "tests/value/shapley/test_naive.py::test_analytic_exact_shapley[6-permutation_exact_shapley-0.01-1e-05]": 0.34537768000154756,
-    "tests/value/shapley/test_naive.py::test_grouped_linear[2-0-50-3-r2]": 0.057835308980429545,
-    "tests/value/shapley/test_naive.py::test_grouped_linear[2-1-100-5-explained_variance]": 1.2154581100330688,
-    "tests/value/shapley/test_naive.py::test_grouped_linear[2-1-100-5-r2]": 1.1950475970224943,
+    "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game0-0.1-1e-05]": 0.035801175001324737,
+    "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game1-0.1-1e-05]": 0.020296718997997232,
+    "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game2-0.1-1e-05]": 0.026713223998740432,
+    "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game3-0.1-1e-05]": 0.024250888998722075,
+    "tests/value/shapley/test_naive.py::test_games[combinatorial_exact_shapley-test_game4-0.1-1e-05]": 0.08578255800057377,
+    "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game0-0.1-1e-05]": 0.0334680340019986,
+    "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game1-0.1-1e-05]": 0.02248540199798299,
+    "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game2-0.1-1e-05]": 0.021266358000502805,
+    "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game3-0.1-1e-05]": 0.02347195299989835,
+    "tests/value/shapley/test_naive.py::test_games[permutation_exact_shapley-test_game4-0.1-1e-05]": 0.9876527700016595,
+    "tests/value/shapley/test_naive.py::test_grouped_linear[2-0-50-3-r2]": 0.19884431500031496,
+    "tests/value/shapley/test_naive.py::test_grouped_linear[2-1-100-5-explained_variance]": 3.9915946569999505,
+    "tests/value/shapley/test_naive.py::test_grouped_linear[2-1-100-5-r2]": 3.9885682109998015,
     "tests/value/shapley/test_naive.py::test_linear[2-0-10-r2]": 0.05533879197901115,
     "tests/value/shapley/test_naive.py::test_linear[2-1-10-explained_variance]": 0.058987755968701094,
     "tests/value/shapley/test_naive.py::test_linear[2-1-10-neg_median_absolute_error]": 0.05515471697435714,
     "tests/value/shapley/test_naive.py::test_linear[2-1-10-r2]": 0.05683578198659234,
-    "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-0-20-r2]": 7.4271527160017285,
-    "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-explained_variance]": 7.752014733996475,
-    "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-neg_median_absolute_error]": 7.2494586749817245,
-    "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-r2]": 7.528596303978702,
-    "tests/value/shapley/test_naive.py::test_polynomial[coefficients0-r2]": 0.10091358600766398,
-    "tests/value/shapley/test_naive.py::test_polynomial[coefficients1-neg_median_absolute_error]": 0.09756919997744262,
-    "tests/value/shapley/test_naive.py::test_polynomial[coefficients2-explained_variance]": 0.10092617000918835,
-    "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients0-r2]": 0.05707916300161742,
-    "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients1-neg_median_absolute_error]": 0.058802402985747904,
-    "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients2-explained_variance]": 0.06408755297889002,
+    "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-0-20-r2]": 25.743576199000017,
+    "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-explained_variance]": 26.06965675200081,
+    "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-neg_median_absolute_error]": 25.645237798999005,
+    "tests/value/shapley/test_naive.py::test_linear_with_outlier[2-1-20-r2]": 25.97635805399841,
+    "tests/value/shapley/test_naive.py::test_polynomial[coefficients0-r2]": 0.20116403300016827,
+    "tests/value/shapley/test_naive.py::test_polynomial[coefficients1-neg_median_absolute_error]": 0.20279847600068024,
+    "tests/value/shapley/test_naive.py::test_polynomial[coefficients2-explained_variance]": 0.20646126699830347,
+    "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients0-r2]": 0.15503699600049003,
+    "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients1-neg_median_absolute_error]": 0.15186486699894886,
+    "tests/value/shapley/test_naive.py::test_polynomial_with_outlier[coefficients2-explained_variance]": 0.1560443580001447,
+    "tests/value/shapley/test_truncated.py::test_games[done0-NoTruncation-truncation_kwargs0-test_game0]": 8.864981821001493,
+    "tests/value/shapley/test_truncated.py::test_games[done0-NoTruncation-truncation_kwargs0-test_game1]": 8.904717276998781,
+    "tests/value/shapley/test_truncated.py::test_games[done1-FixedTruncation-truncation_kwargs1-test_game0]": 8.893666212001335,
+    "tests/value/shapley/test_truncated.py::test_games[done1-FixedTruncation-truncation_kwargs1-test_game1]": 8.871429693997925,
     "tests/value/shapley/test_truncated.py::test_tmcs_analytic_montecarlo_shapley[12-truncated_montecarlo-0.1-1e-05-kwargs0]": 5.025441929989029,
     "tests/value/shapley/test_truncated.py::test_tmcs_linear_montecarlo_shapley[truncated_montecarlo-kwargs0-scorer0-0.25-2-0-21]": 5.633914494974306,
-    "tests/value/shapley/test_truncated.py::test_tmcs_linear_montecarlo_with_outlier[truncated_montecarlo-kwargs0-scorer0-0.2-2-0-21]": 3.523623990971828,
-    "tests/value/test_sampler.py::test_chunkify[AntitheticSampler]": 0.0012030639918521047,
-    "tests/value/test_sampler.py::test_chunkify[DeterministicUniformSampler]": 0.0011419990041758865,
-    "tests/value/test_sampler.py::test_chunkify[RandomHierarchicalSampler]": 0.0011900250101462007,
-    "tests/value/test_sampler.py::test_chunkify[UniformSampler]": 0.0013321389851626009,
-    "tests/value/test_sampler.py::test_chunkify_permutation[DeterministicPermutationSampler]": 0.0010862670314963907,
-    "tests/value/test_sampler.py::test_chunkify_permutation[PermutationSampler]": 0.001125522016081959,
-    "tests/value/test_sampler.py::test_proper[indices0-AntitheticSampler]": 0.0011964229634031653,
-    "tests/value/test_sampler.py::test_proper[indices0-DeterministicPermutationSampler]": 0.0013584279513452202,
-    "tests/value/test_sampler.py::test_proper[indices0-DeterministicUniformSampler]": 0.0013845030043739825,
-    "tests/value/test_sampler.py::test_proper[indices0-PermutationSampler]": 0.0012692750024143606,
-    "tests/value/test_sampler.py::test_proper[indices0-RandomHierarchicalSampler]": 0.0011780599888879806,
-    "tests/value/test_sampler.py::test_proper[indices0-UniformSampler]": 0.0012423349835444242,
-    "tests/value/test_sampler.py::test_proper[indices1-AntitheticSampler]": 0.001568679028423503,
-    "tests/value/test_sampler.py::test_proper[indices1-DeterministicPermutationSampler]": 0.0013892220158595592,
-    "tests/value/test_sampler.py::test_proper[indices1-DeterministicUniformSampler]": 0.0014415960176847875,
-    "tests/value/test_sampler.py::test_proper[indices1-PermutationSampler]": 0.0012552720145322382,
-    "tests/value/test_sampler.py::test_proper[indices1-RandomHierarchicalSampler]": 0.0017029709706548601,
-    "tests/value/test_sampler.py::test_proper[indices1-UniformSampler]": 0.0015911830123513937,
-    "tests/value/test_sampler.py::test_proper_reproducible[indices0-AntitheticSampler]": 0.0014955719816498458,
-    "tests/value/test_sampler.py::test_proper_reproducible[indices0-PermutationSampler]": 0.0017780059715732932,
-    "tests/value/test_sampler.py::test_proper_reproducible[indices0-RandomHierarchicalSampler]": 0.0015286150155588984,
-    "tests/value/test_sampler.py::test_proper_reproducible[indices0-UniformSampler]": 0.0013392769906204194,
-    "tests/value/test_sampler.py::test_proper_reproducible[indices1-AntitheticSampler]": 0.005814862961415201,
-    "tests/value/test_sampler.py::test_proper_reproducible[indices1-PermutationSampler]": 0.0022604400001000613,
-    "tests/value/test_sampler.py::test_proper_reproducible[indices1-RandomHierarchicalSampler]": 0.01281771101639606,
-    "tests/value/test_sampler.py::test_proper_reproducible[indices1-UniformSampler]": 0.006939170008990914,
-    "tests/value/test_sampler.py::test_proper_stochastic[indices0-AntitheticSampler]": 0.001301849988522008,
-    "tests/value/test_sampler.py::test_proper_stochastic[indices0-PermutationSampler]": 0.0013378779985941947,
-    "tests/value/test_sampler.py::test_proper_stochastic[indices0-RandomHierarchicalSampler]": 0.0014513320056721568,
-    "tests/value/test_sampler.py::test_proper_stochastic[indices0-UniformSampler]": 0.0014353079604916275,
-    "tests/value/test_sampler.py::test_proper_stochastic[indices1-AntitheticSampler]": 0.006029498006682843,
-    "tests/value/test_sampler.py::test_proper_stochastic[indices1-PermutationSampler]": 0.0019644349522423,
-    "tests/value/test_sampler.py::test_proper_stochastic[indices1-RandomHierarchicalSampler]": 0.012361108005279675,
-    "tests/value/test_sampler.py::test_proper_stochastic[indices1-UniformSampler]": 0.006347205984639004,
-    "tests/value/test_semivalues.py::test_banzhaf[AntitheticPermutationSampler-5]": 10.714197647990659,
-    "tests/value/test_semivalues.py::test_banzhaf[AntitheticSampler-5]": 4.695468286023242,
-    "tests/value/test_semivalues.py::test_banzhaf[DeterministicPermutationSampler-5]": 6.074063064996153,
-    "tests/value/test_semivalues.py::test_banzhaf[DeterministicUniformSampler-5]": 4.212341544014635,
-    "tests/value/test_semivalues.py::test_banzhaf[PermutationSampler-5]": 8.149094285006868,
-    "tests/value/test_semivalues.py::test_banzhaf[UniformSampler-5]": 4.764893947984092,
-    "tests/value/test_semivalues.py::test_coefficients[banzhaf_coefficient-100]": 0.003842581994831562,
-    "tests/value/test_semivalues.py::test_coefficients[banzhaf_coefficient-10]": 0.0032151709601748735,
-    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w0-100]": 0.004444399964995682,
-    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w0-10]": 0.003756532969418913,
-    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w1-100]": 0.004344976012362167,
-    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w1-10]": 0.003551592002622783,
-    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w2-100]": 0.004556107014650479,
-    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w2-10]": 0.0035066070267930627,
-    "tests/value/test_semivalues.py::test_coefficients[shapley_coefficient-100]": 0.0047601540281903,
-    "tests/value/test_semivalues.py::test_coefficients[shapley_coefficient-10]": 0.0030498180130962282,
+    "tests/value/shapley/test_truncated.py::test_tmcs_linear_montecarlo_with_outlier[truncated_montecarlo-kwargs0-scorer0-0.2-2-0-21]": 7.1438663650005765,
+    "tests/value/test_sampler.py::test_chunkify[AntitheticSampler]": 0.002635386001202278,
+    "tests/value/test_sampler.py::test_chunkify[DeterministicUniformSampler]": 0.002136322002115776,
+    "tests/value/test_sampler.py::test_chunkify[RandomHierarchicalSampler]": 0.0024412720013060607,
+    "tests/value/test_sampler.py::test_chunkify[UniformSampler]": 0.0022573409987671766,
+    "tests/value/test_sampler.py::test_chunkify_permutation[DeterministicPermutationSampler]": 0.0024367070000153035,
+    "tests/value/test_sampler.py::test_chunkify_permutation[PermutationSampler]": 0.002322892001757282,
+    "tests/value/test_sampler.py::test_proper[indices0-AntitheticSampler]": 0.003334062997964793,
+    "tests/value/test_sampler.py::test_proper[indices0-DeterministicPermutationSampler]": 0.002626270001201192,
+    "tests/value/test_sampler.py::test_proper[indices0-DeterministicUniformSampler]": 0.002812078997521894,
+    "tests/value/test_sampler.py::test_proper[indices0-PermutationSampler]": 0.002539194001656142,
+    "tests/value/test_sampler.py::test_proper[indices0-RandomHierarchicalSampler]": 0.0026362519984104438,
+    "tests/value/test_sampler.py::test_proper[indices0-UniformSampler]": 0.0024412409984506667,
+    "tests/value/test_sampler.py::test_proper[indices1-AntitheticSampler]": 0.0027277339995634975,
+    "tests/value/test_sampler.py::test_proper[indices1-DeterministicPermutationSampler]": 0.002861182998458389,
+    "tests/value/test_sampler.py::test_proper[indices1-DeterministicUniformSampler]": 0.004058188998897094,
+    "tests/value/test_sampler.py::test_proper[indices1-PermutationSampler]": 0.0026329000011173775,
+    "tests/value/test_sampler.py::test_proper[indices1-RandomHierarchicalSampler]": 0.003700332001244533,
+    "tests/value/test_sampler.py::test_proper[indices1-UniformSampler]": 0.003620775998570025,
+    "tests/value/test_sampler.py::test_proper_reproducible[indices0-AntitheticSampler]": 0.0028454019975470146,
+    "tests/value/test_sampler.py::test_proper_reproducible[indices0-PermutationSampler]": 0.0024918920007621637,
+    "tests/value/test_sampler.py::test_proper_reproducible[indices0-RandomHierarchicalSampler]": 0.0020272490019124234,
+    "tests/value/test_sampler.py::test_proper_reproducible[indices0-UniformSampler]": 0.0027337100000295322,
+    "tests/value/test_sampler.py::test_proper_reproducible[indices1-AntitheticSampler]": 0.009103345002586138,
+    "tests/value/test_sampler.py::test_proper_reproducible[indices1-PermutationSampler]": 0.003312619001007988,
+    "tests/value/test_sampler.py::test_proper_reproducible[indices1-RandomHierarchicalSampler]": 0.017666732001089258,
+    "tests/value/test_sampler.py::test_proper_reproducible[indices1-UniformSampler]": 0.010906160998274572,
+    "tests/value/test_sampler.py::test_proper_stochastic[indices0-AntitheticSampler]": 0.0025616729999455856,
+    "tests/value/test_sampler.py::test_proper_stochastic[indices0-PermutationSampler]": 0.0034559460000309628,
+    "tests/value/test_sampler.py::test_proper_stochastic[indices0-RandomHierarchicalSampler]": 0.0029194710004958324,
+    "tests/value/test_sampler.py::test_proper_stochastic[indices0-UniformSampler]": 0.0028906579991598846,
+    "tests/value/test_sampler.py::test_proper_stochastic[indices1-AntitheticSampler]": 0.011208809999516234,
+    "tests/value/test_sampler.py::test_proper_stochastic[indices1-PermutationSampler]": 0.003227124001568882,
+    "tests/value/test_sampler.py::test_proper_stochastic[indices1-RandomHierarchicalSampler]": 0.020847252999374177,
+    "tests/value/test_sampler.py::test_proper_stochastic[indices1-UniformSampler]": 0.01049548499941011,
+    "tests/value/test_semivalues.py::test_banzhaf[AntitheticPermutationSampler-5]": 19.099751196999932,
+    "tests/value/test_semivalues.py::test_banzhaf[AntitheticSampler-5]": 8.640272729999197,
+    "tests/value/test_semivalues.py::test_banzhaf[DeterministicPermutationSampler-5]": 11.046467014999507,
+    "tests/value/test_semivalues.py::test_banzhaf[DeterministicUniformSampler-5]": 7.140763282997796,
+    "tests/value/test_semivalues.py::test_banzhaf[PermutationSampler-5]": 16.536335553000754,
+    "tests/value/test_semivalues.py::test_banzhaf[UniformSampler-5]": 8.56469571100206,
+    "tests/value/test_semivalues.py::test_coefficients[banzhaf_coefficient-100]": 0.01005963700117718,
+    "tests/value/test_semivalues.py::test_coefficients[banzhaf_coefficient-10]": 0.008440342002359102,
+    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w0-100]": 0.01072616300007212,
+    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w0-10]": 0.010928496998531045,
+    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w1-100]": 0.012177771001006477,
+    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w1-10]": 0.00821317400004773,
+    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w2-100]": 0.010945971000182908,
+    "tests/value/test_semivalues.py::test_coefficients[beta_coefficient_w2-10]": 0.008208530998672359,
+    "tests/value/test_semivalues.py::test_coefficients[shapley_coefficient-100]": 0.019708362000528723,
+    "tests/value/test_semivalues.py::test_coefficients[shapley_coefficient-10]": 0.007813238997187,
+    "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-AntitheticPermutationSampler-test_game0]": 22.607437191998542,
+    "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-AntitheticPermutationSampler-test_game1]": 19.905466008996882,
+    "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-AntitheticSampler-test_game0]": 22.902231953998125,
+    "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-AntitheticSampler-test_game1]": 20.254530511001576,
+    "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-PermutationSampler-test_game0]": 22.228997524001898,
+    "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-PermutationSampler-test_game1]": 19.948070817999906,
+    "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-UniformSampler-test_game0]": 23.313307015001556,
+    "tests/value/test_semivalues.py::test_games_shapley[beta_coefficient_w-UniformSampler-test_game1]": 20.214418551000563,
+    "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-AntitheticPermutationSampler-test_game0]": 16.339908187999754,
+    "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-AntitheticPermutationSampler-test_game1]": 14.935287896998489,
+    "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-AntitheticSampler-test_game0]": 16.71660759900078,
+    "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-AntitheticSampler-test_game1]": 15.118247157999576,
+    "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-PermutationSampler-test_game0]": 16.669900056000188,
+    "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-PermutationSampler-test_game1]": 14.85890512199876,
+    "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-UniformSampler-test_game0]": 16.9996823649999,
+    "tests/value/test_semivalues.py::test_games_shapley[shapley_coefficient-UniformSampler-test_game1]": 15.419395829998393,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicPermutationSampler-test_game0]": 7.571815403000073,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicPermutationSampler-test_game1]": 6.795873736999056,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicPermutationSampler-test_game2]": 6.49785933900057,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicPermutationSampler-test_game3]": 7.046587265998824,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicUniformSampler-test_game0]": 6.9995765299991035,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicUniformSampler-test_game1]": 7.470778629000051,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicUniformSampler-test_game2]": 6.813381661997482,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[beta_coefficient_w-DeterministicUniformSampler-test_game3]": 7.335269874001824,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicPermutationSampler-test_game0]": 8.675189851999676,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicPermutationSampler-test_game1]": 6.932035337997149,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicPermutationSampler-test_game2]": 6.9341853499990975,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicPermutationSampler-test_game3]": 6.737996050998845,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicUniformSampler-test_game0]": 4.491834778002158,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicUniformSampler-test_game1]": 6.446436399000959,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicUniformSampler-test_game2]": 6.968900550000399,
+    "tests/value/test_semivalues.py::test_games_shapley_deterministic[shapley_coefficient-DeterministicUniformSampler-test_game3]": 6.659720210998785,
+    "tests/value/test_semivalues.py::test_marginal_batch_size[PermutationSampler-beta_coefficient_w-5-test_game0]": 0.004239154999595485,
     "tests/value/test_semivalues.py::test_shapley[beta_coefficient_w-AntitheticPermutationSampler-5]": 5.1298250389809255,
     "tests/value/test_semivalues.py::test_shapley[beta_coefficient_w-AntitheticSampler-5]": 21.97495059997891,
     "tests/value/test_semivalues.py::test_shapley[beta_coefficient_w-DeterministicPermutationSampler-5]": 5.294114143965999,
@@ -460,18 +835,20 @@
     "tests/value/test_semivalues.py::test_shapley[shapley_coefficient-DeterministicUniformSampler-5]": 3.263753114035353,
     "tests/value/test_semivalues.py::test_shapley[shapley_coefficient-PermutationSampler-5]": 4.766259174008155,
     "tests/value/test_semivalues.py::test_shapley[shapley_coefficient-UniformSampler-5]": 8.919797526003094,
+    "tests/value/test_semivalues.py::test_shapley_batch_size[1-PermutationSampler-beta_coefficient_w-5-test_game0]": 9.699354351001602,
+    "tests/value/test_semivalues.py::test_shapley_batch_size[2-PermutationSampler-beta_coefficient_w-5-test_game0]": 11.229309665000983,
     "tests/value/test_semivalues.py::test_shapley_batch_size[5-PermutationSampler-beta_coefficient_w-5]": 9.19877936199191,
-    "tests/value/test_stopping.py::test_history_deviation[0.01-100]": 0.7586702810076531,
-    "tests/value/test_stopping.py::test_history_deviation[0.01-1]": 0.01646678801625967,
-    "tests/value/test_stopping.py::test_history_deviation[0.01-42]": 0.35505866500898264,
-    "tests/value/test_stopping.py::test_history_deviation[0.05-100]": 0.15892104100203142,
-    "tests/value/test_stopping.py::test_history_deviation[0.05-1]": 0.003904131968738511,
-    "tests/value/test_stopping.py::test_history_deviation[0.05-42]": 0.06365110300248489,
-    "tests/value/test_stopping.py::test_make_criterion": 0.0067943750182166696,
-    "tests/value/test_stopping.py::test_max_checks": 0.0022287879837676883,
-    "tests/value/test_stopping.py::test_max_time": 0.30431480798870325,
-    "tests/value/test_stopping.py::test_minmax_updates": 0.003805230953730643,
-    "tests/value/test_stopping.py::test_standard_error": 0.003371614031493664,
-    "tests/value/test_stopping.py::test_stopping_criterion": 0.004461375967366621,
-    "tests/value/test_stopping.py::test_stopping_criterion_composition": 0.007468684023479
+    "tests/value/test_stopping.py::test_history_deviation[0.01-100]": 1.7738857549993554,
+    "tests/value/test_stopping.py::test_history_deviation[0.01-1]": 0.029810868998538353,
+    "tests/value/test_stopping.py::test_history_deviation[0.01-42]": 0.7947784120024153,
+    "tests/value/test_stopping.py::test_history_deviation[0.05-100]": 0.3636526160007634,
+    "tests/value/test_stopping.py::test_history_deviation[0.05-1]": 0.010319109000192839,
+    "tests/value/test_stopping.py::test_history_deviation[0.05-42]": 0.16107529900000372,
+    "tests/value/test_stopping.py::test_make_criterion": 0.016543962998184725,
+    "tests/value/test_stopping.py::test_max_checks": 0.006280684001467307,
+    "tests/value/test_stopping.py::test_max_time": 0.30847623600129737,
+    "tests/value/test_stopping.py::test_minmax_updates": 0.012927236997711589,
+    "tests/value/test_stopping.py::test_standard_error": 0.007960140001159743,
+    "tests/value/test_stopping.py::test_stopping_criterion": 0.011265246001130436,
+    "tests/value/test_stopping.py::test_stopping_criterion_composition": 0.019021763000637293
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index afe61ac67..1b9483834 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,16 +2,23 @@
 
 ## Unreleased
 
+### Added
+
+- Implement new method: `EkfacInfluence`
+  [PR #451](https://github.com/aai-institute/pyDVL/issues/451)
+- New notebook to showcase ekfac for LLMs
+  [PR #483](https://github.com/aai-institute/pyDVL/pull/483)
+- Implemented exact games in Castro et al. 2009 and 2017
+  [PR #341](https://github.com/appliedAI-Initiative/pyDVL/pull/341)
+
 ### Fixed
 
 - Bug in using `DaskInfluenceCalcualator` with `TorchnumpyConverter`
   for single dimensional arrays [PR #485](https://github.com/aai-institute/pyDVL/pull/485)
 - Fix implementations of `to` methods of `TorchInfluenceFunctionModel` implementations
   [PR #487](https://github.com/aai-institute/pyDVL/pull/487)
-- Implement new method: `EkfacInfluence`
-  [PR #476](https://github.com/aai-institute/pyDVL/pull/476)
-- New notebook to showcase ekfac for LLMs
-  [PR #483](https://github.com/aai-institute/pyDVL/pull/483)
+- Fixed bug with checking for converged values in semivalues
+  [PR #341](https://github.com/appliedAI-Initiative/pyDVL/pull/341)
 
 ## 0.8.0 - 🆕 New interfaces, scaling computation, bug fixes and improvements 🎁
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 198c7ded3..3d1bd0dc9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -343,8 +343,12 @@ runs](#skipping-ci-runs)).
 3. We split the tests based on their duration into groups and run them in parallel.
   
    For that we use [pytest-split](https://jerry-git.github.io/pytest-split)
-   to first store the duration of all tests with `pytest --store-durations pytest --slow-tests`
+   to first store the duration of all tests with
+   `tox -e tests -- --store-durations --slow-tests`
    in a `.test_durations` file.
+   
+   Alternatively, we case use pytest directly
+   `pytest --store-durations --slow-tests`.
 
    > **Note** This does not have to be done each time a new test or test case
    > is added. For new tests and test cases pytes-split assumes
@@ -359,11 +363,14 @@ runs](#skipping-ci-runs)).
    Then we can have as many splits as we want:
 
    ```shell
-   pytest --splits 3 --group 1
-   pytest --splits 3 --group 2
-   pytest --splits 3 --group 3
+   tox -e tests -- --splits 3 --group 1
+   tox -e tests -- --splits 3 --group 2
+   tox -e tests -- --splits 3 --group 3
    ```
    
+   Alternatively, we case use pytest directly
+   `pytest --splits 3 ---group 1`.
+   
    Each one of these commands should be run in a separate shell/job
    to run the test groups in parallel and decrease the total runtime.
 
diff --git a/src/pydvl/utils/types.py b/src/pydvl/utils/types.py
index 1a915c33c..18a22bd26 100644
--- a/src/pydvl/utils/types.py
+++ b/src/pydvl/utils/types.py
@@ -23,7 +23,7 @@
 ]
 
 IndexT = TypeVar("IndexT", bound=np.int_)
-NameT = TypeVar("NameT", bound=np.object_)
+NameT = TypeVar("NameT", np.object_, np.int_)
 R = TypeVar("R", covariant=True)
 Seed = Union[int, Generator]
 
diff --git a/src/pydvl/utils/utility.py b/src/pydvl/utils/utility.py
index b975c0ff2..1afbfdeb3 100644
--- a/src/pydvl/utils/utility.py
+++ b/src/pydvl/utils/utility.py
@@ -38,7 +38,7 @@
 from pydvl.utils.score import Scorer
 from pydvl.utils.types import SupervisedModel
 
-__all__ = ["Utility", "DataUtilityLearning", "MinerGameUtility", "GlovesGameUtility"]
+__all__ = ["Utility", "DataUtilityLearning"]
 
 logger = logging.getLogger(__name__)
 
@@ -356,120 +356,3 @@ def __call__(self, indices: Iterable[int]) -> float:
     def data(self) -> Dataset:
         """Returns the wrapped utility's [Dataset][pydvl.utils.dataset.Dataset]."""
         return self.utility.data
-
-
-class MinerGameUtility(Utility):
-    r"""Toy game utility that is used for testing and demonstration purposes.
-
-    Consider a group of n miners, who have discovered large bars of gold.
-
-    If two miners can carry one piece of gold, then the payoff of a
-    coalition $S$ is:
-
-    $${
-    v(S) = \left\{\begin{array}{lll}
-    \mid S \mid / 2 & \text{, if} & \mid S \mid \text{ is even} \\
-    ( \mid S \mid - 1)/2 & \text{, if} & \mid S \mid \text{ is odd}
-    \end{array}\right.
-    }$$
-
-    If there are more than two miners and there is an even number of miners,
-    then the core consists of the single payoff where each miner gets 1/2.
-
-    If there is an odd number of miners, then the core is empty.
-
-    Taken from [Wikipedia](https://en.wikipedia.org/wiki/Core_(game_theory))
-
-    Args:
-        n_miners: Number of miners that participate in the game.
-    """
-
-    def __init__(self, n_miners: int, **kwargs):
-        if n_miners <= 2:
-            raise ValueError(f"n_miners, {n_miners} should be > 2")
-        self.n_miners = n_miners
-
-        x = np.arange(n_miners)[..., np.newaxis]
-        # The y values don't matter here
-        y = np.zeros_like(x)
-
-        self.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)
-
-    def __call__(self, indices: Iterable[int]) -> float:
-        n = len(tuple(indices))
-        if n % 2 == 0:
-            return n / 2
-        else:
-            return (n - 1) / 2
-
-    def _initialize_utility_wrapper(self):
-        pass
-
-    def exact_least_core_values(self) -> Tuple[NDArray[np.float_], float]:
-        if self.n_miners % 2 == 0:
-            values = np.array([0.5] * self.n_miners)
-            subsidy = 0.0
-        else:
-            values = np.array(
-                [(self.n_miners - 1) / (2 * self.n_miners)] * self.n_miners
-            )
-            subsidy = (self.n_miners - 1) / (2 * self.n_miners)
-        return values, subsidy
-
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}(n={self.n_miners})"
-
-
-class GlovesGameUtility(Utility):
-    r"""Toy game utility that is used for testing and demonstration purposes.
-
-    In this game, some players have a left glove and others a right glove.
-    Single gloves have a worth of zero while pairs have a worth of 1.
-
-    The payoff of a coalition $S$ is:
-
-    $${
-    v(S) = \min( \mid S \cap L \mid, \mid S \cap R \mid )
-    }$$
-
-    Where $L$, respectively $R$, is the set of players with left gloves,
-    respectively right gloves.
-
-    Args:
-        left: Number of players with a left glove.
-        right: Number of player with a right glove.
-
-    """
-
-    def __init__(self, left: int, right: int, **kwargs):
-        self.left = left
-        self.right = right
-
-        x = np.empty(left + right)[..., np.newaxis]
-        # The y values don't matter here
-        y = np.zeros_like(x)
-
-        self.data = Dataset(x_train=x, y_train=y, x_test=x, y_test=y)
-
-    def __call__(self, indices: Iterable[int]) -> float:
-        left_sum = float(np.sum(np.asarray(indices) < self.left))
-        right_sum = float(np.sum(np.asarray(indices) >= self.left))
-        return min(left_sum, right_sum)
-
-    def _initialize_utility_wrapper(self):
-        pass
-
-    def exact_least_core_values(self) -> Tuple[NDArray[np.float_], float]:
-        if self.left == self.right:
-            subsidy = -0.5
-            values = np.array([0.5] * (self.left + self.right))
-        elif self.left < self.right:
-            subsidy = 0.0
-            values = np.array([1.0] * self.left + [0.0] * self.right)
-        else:
-            subsidy = 0.0
-            values = np.array([0.0] * self.left + [1.0] * self.right)
-        return values, subsidy
-
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}(L={self.left}, R={self.right})"
diff --git a/src/pydvl/value/games.py b/src/pydvl/value/games.py
new file mode 100644
index 000000000..ef942ebcf
--- /dev/null
+++ b/src/pydvl/value/games.py
@@ -0,0 +1,637 @@
+"""
+This module provides several predefined games and, depending on the game,
+the corresponding Shapley values, Least Core values or both of them, for
+benchmarking purposes.
+
+## References
+
+[^1]: <a name="castro_polynomial_2009"></a>Castro, J., Gómez, D. and Tejada, J., 2009.
+    [Polynomial calculation of the Shapley value based on sampling](http://www.sciencedirect.com/science/article/pii/S0305054808000804).
+    Computers & Operations Research, 36(5), pp.1726-1730.
+
+"""
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from functools import lru_cache
+from typing import Iterable, Optional, Tuple
+
+import numpy as np
+import scipy as sp
+from numpy.typing import NDArray
+
+from pydvl.utils import Scorer, Status
+from pydvl.utils.dataset import Dataset
+from pydvl.utils.types import SupervisedModel
+from pydvl.utils.utility import Utility
+from pydvl.value import ValuationResult
+
+__all__ = [
+    "Game",
+    "SymmetricVotingGame",
+    "AsymmetricVotingGame",
+    "ShoesGame",
+    "AirportGame",
+    "MinimumSpanningTreeGame",
+    "MinerGame",
+]
+
+
+class DummyGameDataset(Dataset):
+    """Dummy game dataset.
+
+    Initializes a dummy game dataset with n_players and an optional
+    description.
+
+    This class is used internally inside the [Game][pydvl.value.games.Game]
+    class.
+
+    Args:
+        n_players: Number of players that participate in the game.
+        description: Optional description of the dataset.
+    """
+
+    def __init__(self, n_players: int, description: Optional[str] = None) -> None:
+        x = np.arange(0, n_players, 1).reshape(-1, 1)
+        nil = np.zeros_like(x)
+        super().__init__(
+            x,
+            nil.copy(),
+            nil.copy(),
+            nil.copy(),
+            feature_names=["x"],
+            target_names=["y"],
+            description=description,
+        )
+
+    def get_test_data(
+        self, indices: Optional[Iterable[int]] = None
+    ) -> Tuple[NDArray, NDArray]:
+        """Returns the subsets of the train set instead of the test set.
+
+        Args:
+            indices: Indices into the training data.
+
+        Returns:
+            Subset of the train data.
+        """
+        if indices is None:
+            return self.x_train, self.y_train
+        x = self.x_train[indices]
+        y = self.y_train[indices]
+        return x, y
+
+
+class DummyModel(SupervisedModel):
+    """Dummy model class.
+
+    A dummy supervised model used for testing purposes only.
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def fit(self, x: NDArray, y: NDArray) -> None:
+        pass
+
+    def predict(self, x: NDArray) -> NDArray:  # type: ignore
+        pass
+
+    def score(self, x: NDArray, y: NDArray) -> float:
+        # Dummy, will be overriden
+        return 0
+
+
+class Game(ABC):
+    """Base class for games
+
+    Any Game subclass has to implement the abstract `_score` method
+    to assign a score to each coalition/subset and at least
+    one of `shapley_values`, `least_core_values`.
+
+    Args:
+        n_players: Number of players that participate in the game.
+        score_range: Minimum and maximum values of the `_score` method.
+        description: Optional string description of the dummy dataset that will be created.
+
+    Attributes:
+        n_players: Number of players that participate in the game.
+        data: Dummy dataset object.
+        u: Utility object with a dummy model and dataset.
+    """
+
+    def __init__(
+        self,
+        n_players: int,
+        score_range: Tuple[float, float] = (-np.inf, np.inf),
+        description: Optional[str] = None,
+    ):
+        self.n_players = n_players
+        self.data = DummyGameDataset(self.n_players, description)
+        self.u = Utility(
+            DummyModel(),
+            self.data,
+            scorer=Scorer(self._score, range=score_range),
+            catch_errors=False,
+            show_warnings=True,
+        )
+
+    def shapley_values(self) -> ValuationResult:
+        raise NotImplementedError(
+            f"shapley_values method was not implemented for class {self.__class__.__name__}"
+        )
+
+    def least_core_values(self) -> ValuationResult:
+        raise NotImplementedError(
+            f"least_core_values method was not implemented for class {self.__class__.__name__}"
+        )
+
+    @abstractmethod
+    def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float:
+        ...
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(n_players={self.n_players})"
+
+
+class SymmetricVotingGame(Game):
+    r"""Toy game that is used for testing and demonstration purposes.
+
+    A symmetric voting game defined in
+    (Castro et al., 2009)<sup><a href="#castro_polynomial_2009">1</a></sup>
+    Section 4.1
+
+    For this game the utility of a coalition is 1 if its cardinality is
+    greater than num_samples/2, or 0 otherwise.
+
+    $${
+    v(S) = \left\{\begin{array}{ll}
+    1, & \text{ if} \quad \mid S \mid > \frac{N}{2} \\
+    0, & \text{ otherwise}
+    \end{array}\right.
+    }$$
+
+    Args:
+        n_players: Number of players that participate in the game.
+    """
+
+    def __init__(self, n_players: int) -> None:
+        if n_players % 2 != 0:
+            raise ValueError("n_players must be an even number.")
+        description = "Dummy data for the symmetric voting game in Castro et al. 2009"
+        super().__init__(
+            n_players,
+            score_range=(0, 1),
+            description=description,
+        )
+
+    def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float:
+        return 1 if len(X) > len(self.data) // 2 else 0
+
+    @lru_cache
+    def shapley_values(self) -> ValuationResult:
+        exact_values = np.ones(self.n_players) / self.n_players
+        result: ValuationResult[np.int_, np.int_] = ValuationResult(
+            algorithm="exact_shapley",
+            status=Status.Converged,
+            indices=self.data.indices,
+            values=exact_values,
+            variances=np.zeros_like(self.data.x_train),
+            counts=np.zeros_like(self.data.x_train),
+        )
+        return result
+
+
+class AsymmetricVotingGame(Game):
+    r"""Toy game that is used for testing and demonstration purposes.
+
+    An asymmetric voting game defined in
+    (Castro et al., 2009)<sup><a href="#castro_polynomial_2009">1</a></sup>
+    Section 4.2.
+
+    For this game the player set is $N = \{1,\dots,51\}$ and
+    the utility of a coalition is given by:
+
+    $${
+    v(S) = \left\{\begin{array}{ll}
+    1, & \text{ if} \quad \sum\limits_{i \in S} w_i > \sum\limits_{j \in N}\frac{w_j}{2} \\
+    0, & \text{ otherwise}
+    \end{array}\right.
+    }$$
+
+    where $w = [w_1,\dots, w_{51}]$ is a list of weights associated with each player.
+
+    Args:
+        n_players: Number of players that participate in the game.
+    """
+
+    def __init__(self, n_players: int = 51) -> None:
+        if n_players != 51:
+            raise ValueError(
+                f"{self.__class__.__name__} only supports n_players=51 but got {n_players=}."
+            )
+        description = "Dummy data for the asymmetric voting game in Castro et al. 2009"
+        super().__init__(
+            n_players,
+            score_range=(0, 1),
+            description=description,
+        )
+
+        ranges = [
+            range(0, 1),
+            range(1, 2),
+            range(2, 3),
+            range(3, 5),
+            range(5, 6),
+            range(6, 7),
+            range(7, 9),
+            range(9, 10),
+            range(10, 12),
+            range(12, 15),
+            range(15, 16),
+            range(16, 20),
+            range(20, 24),
+            range(24, 26),
+            range(26, 30),
+            range(30, 34),
+            range(34, 35),
+            range(35, 44),
+            range(44, 51),
+        ]
+
+        ranges_weights = [
+            45,
+            41,
+            27,
+            26,
+            25,
+            21,
+            17,
+            14,
+            13,
+            12,
+            11,
+            10,
+            9,
+            8,
+            7,
+            6,
+            5,
+            4,
+            3,
+        ]
+        ranges_values = [
+            "0.08831",
+            "0.07973",
+            "0.05096",
+            "0.04898",
+            "0.047",
+            "0.03917",
+            "0.03147",
+            "0.02577",
+            "0.02388",
+            "0.022",
+            "0.02013",
+            "0.01827",
+            "0.01641",
+            "0.01456",
+            "0.01272",
+            "0.01088",
+            "0.009053",
+            "0.00723",
+            "0.005412",
+        ]
+
+        self.weight_table = np.zeros(self.n_players)
+        exact_values = np.zeros(self.n_players)
+        for r, w, v in zip(ranges, ranges_weights, ranges_values):
+            self.weight_table[r] = w
+            exact_values[r] = v
+
+        self.exact_values = exact_values
+        self.threshold = np.sum(self.weight_table) / 2
+
+    def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float:
+        return 1 if np.sum(self.weight_table[X]) > self.threshold else 0
+
+    @lru_cache
+    def shapley_values(self) -> ValuationResult:
+        result: ValuationResult[np.int_, np.int_] = ValuationResult(
+            algorithm="exact_shapley",
+            status=Status.Converged,
+            indices=self.data.indices,
+            values=self.exact_values,
+            variances=np.zeros_like(self.data.x_train),
+            counts=np.zeros_like(self.data.x_train),
+        )
+        return result
+
+
+class ShoesGame(Game):
+    """Toy game that is used for testing and demonstration purposes.
+
+    An shoes game defined in
+    (Castro et al., 2009)<sup><a href="#castro_polynomial_2009">1</a></sup>.
+
+    In this game, some players have a left shoe and others a right shoe.
+    Single shoes have a worth of zero while pairs have a worth of 1.
+
+    The payoff of a coalition $S$ is:
+
+    $${
+    v(S) = \min( \mid S \cap L \mid, \mid S \cap R \mid )
+    }$$
+
+    Where $L$, respectively $R$, is the set of players with left shoes,
+    respectively right shoes.
+
+    Args:
+        left: Number of players with a left shoe.
+        right: Number of players with a right shoe.
+    """
+
+    def __init__(self, left: int, right: int) -> None:
+        self.left = left
+        self.right = right
+        n_players = self.left + self.right
+        description = "Dummy data for the shoe game in Castro et al. 2009"
+        max_score = n_players // 2
+        super().__init__(n_players, score_range=(0, max_score), description=description)
+
+    def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float:
+        left_sum = float(np.sum(np.asarray(X) < self.left))
+        right_sum = float(np.sum(np.asarray(X) >= self.left))
+        return min(left_sum, right_sum)
+
+    @lru_cache
+    def shapley_values(self) -> ValuationResult:
+        if self.left != self.right and (self.left > 4 or self.right > 4):
+            raise ValueError(
+                "This class only supports getting exact shapley values "
+                "for left <= 4 and right <= 4 or left == right"
+            )
+        precomputed_values = np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.5, 0.667, 0.75, 0.8],
+                [0.0, 0.167, 0.5, 0.65, 0.733],
+                [0.0, 0.083, 0.233, 0.5, 0.638],
+                [0.0, 0.050, 0.133, 0.271, 0.5],
+            ]
+        )
+        if self.left == self.right:
+            value_left = value_right = min(self.left, self.right) / (
+                self.left + self.right
+            )
+        else:
+            value_left = precomputed_values[self.left, self.right]
+            value_right = precomputed_values[self.right, self.left]
+        exact_values = np.array([value_left] * self.left + [value_right] * self.right)
+        result: ValuationResult[np.int_, np.int_] = ValuationResult(
+            algorithm="exact_shapley",
+            status=Status.Converged,
+            indices=self.data.indices,
+            values=exact_values,
+            variances=np.zeros_like(self.data.x_train),
+            counts=np.zeros_like(self.data.x_train),
+        )
+        return result
+
+    @lru_cache
+    def least_core_values(self) -> ValuationResult:
+        if self.left == self.right:
+            subsidy = -0.5
+            exact_values = np.array([0.5] * (self.left + self.right))
+        elif self.left < self.right:
+            subsidy = 0.0
+            exact_values = np.array([1.0] * self.left + [0.0] * self.right)
+        else:
+            subsidy = 0.0
+            exact_values = np.array([0.0] * self.left + [1.0] * self.right)
+
+        result: ValuationResult[np.int_, np.int_] = ValuationResult(
+            algorithm="exact_least_core",
+            status=Status.Converged,
+            indices=self.data.indices,
+            values=exact_values,
+            subsidy=subsidy,
+            variances=np.zeros_like(self.data.x_train),
+            counts=np.zeros_like(self.data.x_train),
+        )
+        return result
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(L={self.left}, R={self.right})"
+
+
+class AirportGame(Game):
+    """Toy game that is used for testing and demonstration purposes.
+
+    An airport game defined in
+    (Castro et al., 2009)<sup><a href="#castro_polynomial_2009">1</a></sup>
+    Section 4.3
+
+    Args:
+        n_players: Number of players that participate in the game.
+    """
+
+    def __init__(self, n_players: int = 100) -> None:
+        if n_players != 100:
+            raise ValueError(
+                f"{self.__class__.__name__} only supports n_players=100 but got {n_players=}."
+            )
+        description = "A dummy dataset for the airport game in Castro et al. 2009"
+        super().__init__(n_players, score_range=(0, 100), description=description)
+        ranges = [
+            range(0, 8),
+            range(8, 20),
+            range(20, 26),
+            range(26, 40),
+            range(40, 48),
+            range(48, 57),
+            range(57, 70),
+            range(70, 80),
+            range(80, 90),
+            range(90, 100),
+        ]
+        exact = [
+            0.01,
+            0.020869565,
+            0.033369565,
+            0.046883079,
+            0.063549745,
+            0.082780515,
+            0.106036329,
+            0.139369662,
+            0.189369662,
+            0.289369662,
+        ]
+        c = list(range(1, 10))
+        score_table = np.zeros(100)
+        exact_values = np.zeros(100)
+
+        for r, v in zip(ranges, exact):
+            score_table[r] = c
+            exact_values[r] = v
+
+        self.exact_values = exact_values
+        self.score_table = score_table
+
+    def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float:
+        return max(self.score_table[X]) or 0.0
+
+    @lru_cache
+    def shapley_values(self) -> ValuationResult:
+        result: ValuationResult[np.int_, np.int_] = ValuationResult(
+            algorithm="exact_shapley",
+            status=Status.Converged,
+            indices=self.data.indices,
+            values=self.exact_values,
+            variances=np.zeros_like(self.data.x_train),
+            counts=np.zeros_like(self.data.x_train),
+        )
+        return result
+
+
+class MinimumSpanningTreeGame(Game):
+    r"""Toy game that is used for testing and demonstration purposes.
+
+    A minimum spanning tree game defined in
+    (Castro et al., 2009)<sup><a href="#castro_polynomial_2009">1</a></sup>.
+
+    Let $G = (N \cup \{0\},E)$ be a valued graph where $N = \{1,\dots,100\}$,
+    and the cost associated to an edge $(i, j)$ is:
+
+    $${
+    c_{ij} = \left\{\begin{array}{lll}
+    1, & \text{ if} & i = j + 1 \text{ or } i = j - 1 \\
+    & & \text{ or } (i = 1 \text{ and } j = 100) \text{ or } (i = 100 \text{ and } j = 1) \\
+    101, & \text{ if} & i = 0 \text{ or } j = 0 \\
+    \infty, & \text{ otherwise}
+    \end{array}\right.
+    }$$
+
+    A minimum spanning tree game $(N, c)$ is a cost game, where for a given coalition
+    $S \subset N$, $v(S)$ is the sum of the edge cost of the minimum spanning tree,
+    i.e. $v(S)$ = Minimum Spanning Tree of the graph $G|_{S\cup\{0\}}$,
+    which is the partial graph restricted to the players $S$ and the source node $0$.
+
+    Args:
+        n_players: Number of players that participate in the game.
+    """
+
+    def __init__(self, n_players: int = 100) -> None:
+        if n_players != 100:
+            raise ValueError(
+                f"{self.__class__.__name__} only supports n_players=100 but got {n_players=}."
+            )
+        description = (
+            "A dummy dataset for the minimum spanning tree game in Castro et al. 2009"
+        )
+        super().__init__(n_players, score_range=(0, np.inf), description=description)
+
+        graph = np.zeros(shape=(self.n_players, self.n_players))
+
+        for i in range(self.n_players):
+            for j in range(self.n_players):
+                if (
+                    i == j + 1
+                    or i == j - 1
+                    or (i == 1 and j == self.n_players - 1)
+                    or (i == self.n_players - 1 and j == 1)
+                ):
+                    graph[i, j] = 1
+                elif i == 0 or j == 0:
+                    graph[i, j] = 0
+                else:
+                    graph[i, j] = np.inf
+        assert np.all(graph == graph.T)
+
+        self.graph = graph
+
+    def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float:
+        partial_graph = sp.sparse.csr_array(self.graph[np.ix_(X, X)])
+        span_tree = sp.sparse.csgraph.minimum_spanning_tree(partial_graph)
+        return span_tree.sum() or 0
+
+    @lru_cache
+    def shapley_values(self) -> ValuationResult:
+        exact_values = 2 * np.ones_like(self.data.x_train)
+        result: ValuationResult[np.int_, np.int_] = ValuationResult(
+            algorithm="exact_shapley",
+            status=Status.Converged,
+            indices=self.data.indices,
+            values=exact_values,
+            variances=np.zeros_like(self.data.x_train),
+            counts=np.zeros_like(self.data.x_train),
+        )
+        return result
+
+
+class MinerGame(Game):
+    r"""Toy game that is used for testing and demonstration purposes.
+
+    Consider a group of n miners, who have discovered large bars of gold.
+
+    If two miners can carry one piece of gold, then the payoff of a
+    coalition $S$ is:
+
+    $${
+    v(S) = \left\{\begin{array}{lll}
+    \mid S \mid / 2, & \text{ if} & \mid S \mid \text{ is even} \\
+    ( \mid S \mid - 1)/2, & \text{ otherwise}
+    \end{array}\right.
+    }$$
+
+    If there are more than two miners and there is an even number of miners,
+    then the core consists of the single payoff where each miner gets 1/2.
+
+    If there is an odd number of miners, then the core is empty.
+
+    Taken from [Wikipedia](https://en.wikipedia.org/wiki/Core_(game_theory))
+
+    Args:
+        n_players: Number of miners that participate in the game.
+    """
+
+    def __init__(self, n_players: int) -> None:
+        if n_players <= 2:
+            raise ValueError(f"n_players, {n_players}, should be > 2")
+        description = "Dummy data for Miner Game taken from https://en.wikipedia.org/wiki/Core_(game_theory)"
+        super().__init__(
+            n_players,
+            score_range=(0, n_players // 2),
+            description=description,
+        )
+
+    def _score(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float:
+        n = len(X)
+        if n % 2 == 0:
+            return n / 2
+        else:
+            return (n - 1) / 2
+
+    @lru_cache()
+    def least_core_values(self) -> ValuationResult:
+        if self.n_players % 2 == 0:
+            values = np.array([0.5] * self.n_players)
+            subsidy = 0.0
+        else:
+            values = np.array(
+                [(self.n_players - 1) / (2 * self.n_players)] * self.n_players
+            )
+            subsidy = (self.n_players - 1) / (2 * self.n_players)
+
+        result: ValuationResult[np.int_, np.int_] = ValuationResult(
+            algorithm="exact_least_core",
+            status=Status.Converged,
+            indices=self.data.indices,
+            values=values,
+            subsidy=subsidy,
+            variances=np.zeros_like(self.data.x_train),
+            counts=np.zeros_like(self.data.x_train),
+        )
+        return result
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(n={self.n_players})"
diff --git a/src/pydvl/value/semivalues.py b/src/pydvl/value/semivalues.py
index 9eee1c83d..2119e38a9 100644
--- a/src/pydvl/value/semivalues.py
+++ b/src/pydvl/value/semivalues.py
@@ -94,6 +94,7 @@
 from itertools import islice
 from typing import Iterable, List, Optional, Protocol, Tuple, Type, cast
 
+import numpy as np
 import scipy as sp
 from deprecate import deprecated
 from tqdm import tqdm
@@ -271,15 +272,10 @@ def compute_generic_semivalues(
 
                     # Filter out samples for indices that have already converged
                     filtered_samples = samples
-                    if skip_converged and len(done.converged) > 0:
-                        # TODO: cloudpickle can't pickle this on python 3.8:
-                        # filtered_samples = filter(
-                        #     lambda t: not done.converged[t[0]], samples
-                        # )
+                    if skip_converged and np.count_nonzero(done.converged) > 0:
+                        # TODO: cloudpickle can't pickle result of `filter` on python 3.8
                         filtered_samples = tuple(
-                            (idx, sample)
-                            for idx, sample in samples
-                            if not done.converged[idx]
+                            filter(lambda t: not done.converged[t[0]], samples)
                         )
 
                     if filtered_samples:
diff --git a/tests/test_results.py b/tests/test_results.py
index 4ea80cf72..01870ec94 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -4,6 +4,7 @@
 import operator
 import pickle
 from copy import deepcopy
+from itertools import permutations
 
 import cloudpickle
 import numpy as np
@@ -159,6 +160,20 @@ def test_updating():
     assert v.counts[1] == 2
 
 
+def test_updating_order_invariance():
+    updates = [0.8, 0.9, 1.0, 1.1, 1.2]
+    values = []
+    for permutation in permutations(updates):
+        v = ValuationResult.zeros(indices=np.array([0]))
+        for update in permutation:
+            v.update(0, update)
+        values.append(v)
+
+    v1 = values[0]
+    for v2 in values[1:]:
+        np.testing.assert_almost_equal(v1.values, v2.values)
+
+
 @pytest.mark.parametrize(
     "serialize, deserialize",
     [(pickle.dumps, pickle.loads), (cloudpickle.dumps, cloudpickle.loads)],
diff --git a/tests/utils/test_score.py b/tests/utils/test_score.py
index 078775240..5423c48be 100644
--- a/tests/utils/test_score.py
+++ b/tests/utils/test_score.py
@@ -1,5 +1,7 @@
 import numpy as np
+import sklearn
 from numpy.typing import NDArray
+from packaging import version
 
 from pydvl.utils.score import Scorer, compose_score, squashed_r2, squashed_variance
 
@@ -24,7 +26,13 @@ def test_scorer():
     """Tests the Scorer class."""
     scorer = Scorer("r2")
     assert str(scorer) == "r2"
-    assert repr(scorer) == "R2 (scorer=make_scorer(r2_score))"
+    if version.parse(sklearn.__version__) >= version.parse("1.4.0"):
+        assert (
+            repr(scorer)
+            == "R2 (scorer=make_scorer(r2_score, response_method='predict'))"
+        )
+    else:
+        assert repr(scorer) == "R2 (scorer=make_scorer(r2_score))"
 
     coef = np.array([1, 2])
     X = np.array([[1, 2], [3, 4]])
diff --git a/tests/value/__init__.py b/tests/value/__init__.py
index 4b27711c4..19a703d2d 100644
--- a/tests/value/__init__.py
+++ b/tests/value/__init__.py
@@ -19,7 +19,9 @@ def check_total_value(
     Shapley value is supposed to fulfill the total value axiom."""
     total_utility = u(u.data.indices)
     # We can use relative tolerances if we don't have the range of the scorer.
-    assert np.isclose(np.sum(values.values), total_utility, rtol=rtol, atol=atol)
+    np.testing.assert_allclose(
+        np.sum(values.values), total_utility, rtol=rtol, atol=atol
+    )
 
 
 def check_exact(
@@ -33,10 +35,14 @@ def check_exact(
     values.sort()
     exact_values.sort()
 
-    assert np.all(values.indices == exact_values.indices), "Ranks do not match"
-    assert np.allclose(
-        values.values, exact_values.values, rtol=rtol, atol=atol
-    ), "Values do not match"
+    np.testing.assert_equal(values.indices, exact_values.indices, "Ranks do not match")
+    np.testing.assert_allclose(
+        values.values,
+        exact_values.values,
+        rtol=rtol,
+        atol=atol,
+        err_msg="Values do not match",
+    )
 
 
 def check_values(
@@ -66,9 +72,9 @@ def check_values(
     values.sort()
     exact_values.sort()
 
-    assert np.allclose(values.values, exact_values.values, rtol=rtol, atol=atol)
+    np.testing.assert_allclose(values.values, exact_values.values, rtol=rtol, atol=atol)
     for name in extra_values_names:
-        assert np.isclose(
+        np.testing.assert_allclose(
             getattr(values, name), getattr(exact_values, name), rtol=rtol, atol=atol
         )
 
diff --git a/tests/value/conftest.py b/tests/value/conftest.py
index 0e3c48d29..139f0f5b6 100644
--- a/tests/value/conftest.py
+++ b/tests/value/conftest.py
@@ -11,12 +11,35 @@
 from pydvl.utils.caching import InMemoryCacheBackend
 from pydvl.utils.status import Status
 from pydvl.value import ValuationResult
+from pydvl.value.games import (
+    AsymmetricVotingGame,
+    Game,
+    MinerGame,
+    ShoesGame,
+    SymmetricVotingGame,
+)
 from pydvl.value.shapley.naive import combinatorial_exact_shapley
 
 from ..conftest import num_workers
 from . import polynomial
 
 
+@pytest.fixture(scope="module")
+def test_game(request) -> Game:
+    name, kwargs = request.param
+    if name == "miner":
+        game = MinerGame(n_players=kwargs["n_players"])
+    elif name == "shoes":
+        game = ShoesGame(left=kwargs["left"], right=kwargs["right"])
+    elif name == "symmetric-voting":
+        game = SymmetricVotingGame(n_players=kwargs["n_players"])
+    elif name == "asymmetric-voting":
+        game = AsymmetricVotingGame()
+    else:
+        raise ValueError(f"Unknown game '{name}'")
+    return game
+
+
 @pytest.fixture(scope="function")
 def polynomial_dataset(coefficients: np.ndarray):
     """Coefficients must be for monomials of increasing degree"""
diff --git a/tests/value/least_core/conftest.py b/tests/value/least_core/conftest.py
deleted file mode 100644
index 2355c443a..000000000
--- a/tests/value/least_core/conftest.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from typing import Tuple
-
-import numpy as np
-import pytest
-
-from pydvl.utils import Utility
-from pydvl.utils.status import Status
-from pydvl.utils.utility import GlovesGameUtility, MinerGameUtility
-from pydvl.value.result import ValuationResult
-
-
-@pytest.fixture(scope="module")
-def test_utility(request) -> Tuple[Utility, ValuationResult]:
-    name, kwargs = request.param
-    if name == "miner":
-        u = MinerGameUtility(**kwargs)
-    elif name == "gloves":
-        u = GlovesGameUtility(**kwargs)
-    else:
-        raise ValueError(f"Unknown '{name}'")
-    exact_values, subsidy = u.exact_least_core_values()
-    result = ValuationResult(
-        algorithm="exact",
-        values=exact_values,
-        subsidy=subsidy,
-        variances=np.zeros_like(exact_values),
-        data_names=np.arange(len(exact_values)),
-        status=Status.Converged,
-    )
-    return u, result
diff --git a/tests/value/least_core/test_common.py b/tests/value/least_core/test_common.py
index feadeb954..6add2d12a 100644
--- a/tests/value/least_core/test_common.py
+++ b/tests/value/least_core/test_common.py
@@ -8,29 +8,30 @@
 
 
 @pytest.mark.parametrize(
-    "test_utility",
-    [("miner", {"n_miners": 5})],
+    "test_game",
+    [("miner", {"n_players": 5})],
     indirect=True,
 )
-def test_lc_solve_problems(test_utility, n_jobs, parallel_config):
+def test_lc_solve_problems(test_game, n_jobs, parallel_config):
     """Test solving LeastCoreProblems in parallel."""
 
-    u, exact_values = test_utility
     n_problems = n_jobs
-    problem = lc_prepare_problem(u)
+    problem = lc_prepare_problem(test_game.u)
     solutions = lc_solve_problems(
         [problem] * n_problems,
-        u,
+        test_game.u,
         algorithm="test_lc",
         n_jobs=n_jobs,
         config=parallel_config,
     )
     assert len(solutions) == n_problems
 
+    exact_values = test_game.least_core_values()
+
     for solution in solutions:
         assert solution.status == Status.Converged
         check_values(solution, exact_values, rtol=0.01)
 
-        check = lc_solve_problem(problem, u=u, algorithm="test_lc")
+        check = lc_solve_problem(problem, u=test_game.u, algorithm="test_lc")
         assert check.status == Status.Converged
         check_values(solution, check, rtol=0.01)
diff --git a/tests/value/least_core/test_montecarlo.py b/tests/value/least_core/test_montecarlo.py
index 38d675e0d..8b926a3bf 100644
--- a/tests/value/least_core/test_montecarlo.py
+++ b/tests/value/least_core/test_montecarlo.py
@@ -10,28 +10,27 @@
 
 
 @pytest.mark.parametrize(
-    "test_utility, rtol, n_iterations",
+    "test_game, rtol, n_iterations",
     [
-        (("miner", {"n_miners": 8}), 0.1, 128),
-        (("gloves", {"left": 10, "right": 5}), 0.2, 10000),
+        (("miner", {"n_players": 8}), 0.1, 128),
+        (("shoes", {"left": 10, "right": 5}), 0.2, 10000),
     ],
-    indirect=["test_utility"],
+    indirect=["test_game"],
 )
 @pytest.mark.parametrize("n_jobs", [1, -1])
 @pytest.mark.parametrize("non_negative_subsidy", (True, False))
 def test_montecarlo_least_core(
-    test_utility, rtol, n_iterations, n_jobs, non_negative_subsidy, seed
+    test_game, rtol, n_iterations, n_jobs, non_negative_subsidy, seed
 ):
-    u, exact_values = test_utility
-
     values = montecarlo_least_core(
-        u,
+        test_game.u,
         n_iterations=n_iterations,
         non_negative_subsidy=non_negative_subsidy,
         progress=False,
         n_jobs=n_jobs,
         seed=seed,
     )
+    exact_values = test_game.least_core_values()
     if non_negative_subsidy:
         check_values(values, exact_values)
         # Sometimes the subsidy is negative but really close to zero
diff --git a/tests/value/least_core/test_naive.py b/tests/value/least_core/test_naive.py
index 28a79e381..a972e72c0 100644
--- a/tests/value/least_core/test_naive.py
+++ b/tests/value/least_core/test_naive.py
@@ -6,23 +6,23 @@
 
 
 @pytest.mark.parametrize(
-    "test_utility",
+    "test_game",
     [
-        ("miner", {"n_miners": 3}),
-        ("miner", {"n_miners": 4}),
-        ("gloves", {"left": 1, "right": 1}),
-        ("gloves", {"left": 2, "right": 1}),
-        ("gloves", {"left": 1, "right": 2}),
+        ("miner", {"n_players": 3}),
+        ("miner", {"n_players": 4}),
+        ("shoes", {"left": 1, "right": 1}),
+        ("shoes", {"left": 2, "right": 1}),
+        ("shoes", {"left": 1, "right": 2}),
     ],
     indirect=True,
 )
 @pytest.mark.parametrize("non_negative_subsidy", (True, False))
-def test_naive_least_core(test_utility, non_negative_subsidy):
-    u, exact_values = test_utility
+def test_naive_least_core(test_game, non_negative_subsidy):
     values = exact_least_core(
-        u, non_negative_subsidy=non_negative_subsidy, progress=False
+        test_game.u, non_negative_subsidy=non_negative_subsidy, progress=False
     )
-    check_total_value(u, values)
+    check_total_value(test_game.u, values)
+    exact_values = test_game.least_core_values()
     if non_negative_subsidy:
         check_values(values, exact_values)
         # Sometimes the subsidy is negative but really close to zero
diff --git a/tests/value/shapley/test_classwise.py b/tests/value/shapley/test_classwise.py
index bd4f55a5d..d73e86a0b 100644
--- a/tests/value/shapley/test_classwise.py
+++ b/tests/value/shapley/test_classwise.py
@@ -3,7 +3,9 @@
 import numpy as np
 import pandas as pd
 import pytest
+import sklearn
 from numpy.typing import NDArray
+from packaging import version
 
 from pydvl.utils import Dataset, Utility, powerset
 from pydvl.value import MaxChecks, ValuationResult
@@ -165,7 +167,13 @@ def test_classwise_scorer_representation():
 
     scorer = ClasswiseScorer("accuracy", initial_label=0)
     assert str(scorer) == "classwise accuracy"
-    assert repr(scorer) == "ClasswiseAccuracy (scorer=make_scorer(accuracy_score))"
+    if version.parse(sklearn.__version__) >= version.parse("1.4.0"):
+        assert (
+            repr(scorer)
+            == "ClasswiseAccuracy (scorer=make_scorer(accuracy_score, response_method='predict'))"
+        )
+    else:
+        assert repr(scorer) == "ClasswiseAccuracy (scorer=make_scorer(accuracy_score))"
 
 
 @pytest.mark.parametrize("n_element, left_margin, right_margin", [(101, 0.3, 0.4)])
diff --git a/tests/value/shapley/test_montecarlo.py b/tests/value/shapley/test_montecarlo.py
index ef9deed1f..58f9df2a9 100644
--- a/tests/value/shapley/test_montecarlo.py
+++ b/tests/value/shapley/test_montecarlo.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression
 
 from pydvl.parallel.config import ParallelConfig
-from pydvl.utils import Dataset, GroupedDataset, Status, Utility
+from pydvl.utils import GroupedDataset, Status, Utility
 from pydvl.utils.numeric import num_samples_permutation_hoeffding
 from pydvl.utils.score import Scorer, squashed_r2
 from pydvl.utils.types import Seed
@@ -21,35 +21,38 @@
 log = logging.getLogger(__name__)
 
 
-# noinspection PyTestParametrized
 @pytest.mark.parametrize(
-    "num_samples, fun, rtol, atol, kwargs",
+    "test_game",
     [
-        (12, ShapleyMode.PermutationMontecarlo, 0.1, 1e-5, {"done": MaxUpdates(10)}),
-        # FIXME! it should be enough with 2**(len(data)-1) samples
+        ("symmetric-voting", {"n_players": 6}),
+        ("shoes", {"left": 3, "right": 4}),
+    ],
+    indirect=["test_game"],
+)
+@pytest.mark.parametrize(
+    "fun, rtol, atol, kwargs",
+    [
+        (ShapleyMode.PermutationMontecarlo, 0.2, 1e-4, dict(done=MaxUpdates(500))),
         (
-            8,
             ShapleyMode.CombinatorialMontecarlo,
             0.2,
             1e-4,
-            {"done": MaxUpdates(2**10)},
+            dict(done=MaxUpdates(2**10)),
         ),
-        (12, ShapleyMode.Owen, 0.1, 1e-4, dict(n_samples=4, max_q=200)),
-        (12, ShapleyMode.OwenAntithetic, 0.1, 1e-4, dict(n_samples=4, max_q=200)),
+        (ShapleyMode.Owen, 0.2, 1e-4, dict(n_samples=5, max_q=200)),
+        (ShapleyMode.OwenAntithetic, 0.1, 1e-4, dict(n_samples=5, max_q=200)),
+        # Because of the inaccuracy of GroupTesting, a high atol is required for the
+        # value 0, for which the rtol has no effect.
         (
-            3,
             ShapleyMode.GroupTesting,
             0.1,
-            # Because of the inaccuracy of GTS, a high atol is required for the
-            # value 0, for which the rtol has no effect.
             1e-2,
             dict(n_samples=int(4e4), epsilon=0.2, delta=0.01),
         ),
     ],
 )
-def test_analytic_montecarlo_shapley(
-    num_samples,
-    analytic_shapley,
+def test_games(
+    test_game,
     parallel_config,
     n_jobs,
     fun: ShapleyMode,
@@ -58,10 +61,22 @@ def test_analytic_montecarlo_shapley(
     kwargs: dict,
     seed,
 ):
-    u, exact_values = analytic_shapley
+    """Tests values for all methods using a toy games.
+
+    For permutation, the rtol for each scorer is chosen
+    so that the number of samples selected is just above the (ε,δ) bound for ε =
+    rtol, δ=0.001 and the range corresponding to each score. This means that
+    roughly once every 1000/num_methods runs the test will fail.
+
+    FIXME:
+     - We don't have a bound for Owen.
+    NOTE:
+     - The variance in the combinatorial method is huge, so we need lots of
+       samples
 
+    """
     values = compute_shapley_values(
-        u,
+        test_game.u,
         mode=fun,
         n_jobs=n_jobs,
         config=parallel_config,
@@ -70,29 +85,31 @@ def test_analytic_montecarlo_shapley(
         **kwargs
     )
 
+    exact_values = test_game.shapley_values()
     check_values(values, exact_values, rtol=rtol, atol=atol)
 
 
 @pytest.mark.slow
 @pytest.mark.parametrize(
-    "num_samples, fun, kwargs",
+    "test_game",
+    [
+        ("symmetric-voting", {"n_players": 12}),
+    ],
+    indirect=["test_game"],
+)
+@pytest.mark.parametrize(
+    "fun, kwargs",
     [
         # TODO Add once issue #416 is closed.
-        # (12, ShapleyMode.PermutationMontecarlo, {"done": MaxChecks(1)}),
-        (
-            12,
-            ShapleyMode.CombinatorialMontecarlo,
-            {"done": MaxChecks(4)},
-        ),
-        (12, ShapleyMode.Owen, dict(n_samples=4, max_q=200)),
-        (12, ShapleyMode.OwenAntithetic, dict(n_samples=4, max_q=200)),
-        (4, ShapleyMode.GroupTesting, dict(n_samples=21, epsilon=0.2, delta=0.01)),
+        # (ShapleyMode.PermutationMontecarlo, dict(done=MaxChecks(1))),
+        (ShapleyMode.CombinatorialMontecarlo, dict(done=MaxChecks(4))),
+        (ShapleyMode.Owen, dict(n_samples=4, max_q=200)),
+        (ShapleyMode.OwenAntithetic, dict(n_samples=4, max_q=200)),
+        (ShapleyMode.GroupTesting, dict(n_samples=21, epsilon=0.2, delta=0.01)),
     ],
 )
-@pytest.mark.parametrize("num_points, num_features", [(12, 3)])
-def test_montecarlo_shapley_housing_dataset(
-    num_samples: int,
-    housing_dataset: Dataset,
+def test_seed(
+    test_game,
     parallel_config: ParallelConfig,
     n_jobs: int,
     fun: ShapleyMode,
@@ -102,11 +119,10 @@ def test_montecarlo_shapley_housing_dataset(
 ):
     values_1, values_2, values_3 = call_with_seeds(
         compute_shapley_values,
-        Utility(LinearRegression(), data=housing_dataset, scorer="r2"),
+        test_game.u,
         mode=fun,
         n_jobs=n_jobs,
         config=parallel_config,
-        progress=False,
         seeds=(seed, seed, seed_alt),
         **deepcopy(kwargs)
     )
@@ -143,62 +159,6 @@ def test_hoeffding_bound_montecarlo(
             check_rank_correlation(values, exact_values, threshold=0.8)
 
 
-@pytest.mark.parametrize(
-    "a, b, num_points", [(2, 0, 21)]  # training set will have 0.3 * 21 = 6 samples
-)
-@pytest.mark.parametrize("scorer, rtol", [(squashed_r2, 0.25)])
-@pytest.mark.parametrize(
-    "fun, kwargs",
-    [
-        # FIXME: Hoeffding says 400 should be enough
-        (ShapleyMode.PermutationMontecarlo, dict(done=MaxUpdates(500))),
-        (ShapleyMode.CombinatorialMontecarlo, dict(done=MaxUpdates(2**11))),
-        (ShapleyMode.Owen, dict(n_samples=2, max_q=300)),
-        (ShapleyMode.OwenAntithetic, dict(n_samples=2, max_q=300)),
-        pytest.param(
-            ShapleyMode.GroupTesting,
-            dict(n_samples=int(5e4), epsilon=0.25, delta=0.1),
-            marks=pytest.mark.slow,
-        ),
-    ],
-)
-def test_linear_montecarlo_shapley(
-    linear_shapley,
-    n_jobs,
-    memcache_client_config,
-    scorer: Scorer,
-    rtol: float,
-    fun: ShapleyMode,
-    kwargs: dict,
-    seed: int,
-):
-    """Tests values for all methods using a linear dataset.
-
-    For permutation and truncated montecarlo, the rtol for each scorer is chosen
-    so that the number of samples selected is just above the (ε,δ) bound for ε =
-    rtol, δ=0.001 and the range corresponding to each score. This means that
-    roughly once every 1000/num_methods runs the test will fail.
-
-    FIXME:
-     - For permutation, we must increase the number of samples above that what
-       is done for truncated, this is probably due to the averaging done by the
-       latter to reduce variance
-     - We don't have a bound for Owen.
-    NOTE:
-     - The variance in the combinatorial method is huge, so we need lots of
-       samples
-
-    """
-    u, exact_values = linear_shapley
-
-    values = compute_shapley_values(
-        u, mode=fun, progress=False, n_jobs=n_jobs, seed=seed, **kwargs
-    )
-
-    check_values(values, exact_values, rtol=rtol)
-    check_total_value(u, values, rtol=rtol)  # FIXME, could be more than rtol
-
-
 @pytest.mark.slow
 @pytest.mark.parametrize(
     "a, b, num_points", [(2, 0, 21)]  # training set will have 0.3 * 21 ~= 6 samples
diff --git a/tests/value/shapley/test_naive.py b/tests/value/shapley/test_naive.py
index 45c32b1a9..98a18a626 100644
--- a/tests/value/shapley/test_naive.py
+++ b/tests/value/shapley/test_naive.py
@@ -15,55 +15,26 @@
 log = logging.getLogger(__name__)
 
 
-# noinspection PyTestParametrized
 @pytest.mark.parametrize(
-    "num_samples, fun, rtol, total_atol",
+    "test_game, rtol, total_atol",
     [
-        (12, combinatorial_exact_shapley, 0.01, 1e-5),
-        (6, permutation_exact_shapley, 0.01, 1e-5),
+        (("symmetric-voting", {"n_players": 4}), 0.1, 1e-5),
+        (("shoes", {"left": 1, "right": 1}), 0.1, 1e-5),
+        (("shoes", {"left": 2, "right": 1}), 0.1, 1e-5),
+        (("shoes", {"left": 1, "right": 2}), 0.1, 1e-5),
+        (("shoes", {"left": 2, "right": 4}), 0.1, 1e-5),
     ],
+    indirect=["test_game"],
 )
-def test_analytic_exact_shapley(num_samples, analytic_shapley, fun, rtol, total_atol):
-    """Compares the combinatorial exact shapley and permutation exact shapley with
-    the analytic_shapley calculation for a dummy model.
-    """
-    u, exact_values = analytic_shapley
-    values_p = fun(u, progress=False)
-    check_total_value(u, values_p, atol=total_atol)
-    check_values(values_p, exact_values, rtol=rtol)
-
-
 @pytest.mark.parametrize(
-    "a, b, num_points, scorer",
-    [
-        (2, 0, 10, "r2"),
-        (2, 1, 10, "r2"),
-        (2, 1, 10, "neg_median_absolute_error"),
-        (2, 1, 10, "explained_variance"),
-    ],
+    "fun",
+    [combinatorial_exact_shapley, permutation_exact_shapley],
 )
-def test_linear(
-    linear_dataset,
-    memcache_client_config,
-    scorer,
-    cache_backend,
-    rtol=0.01,
-    total_atol=1e-5,
-):
-    linear_utility = Utility(
-        LinearRegression(),
-        data=linear_dataset,
-        scorer=scorer,
-        cache_backend=cache_backend,
-    )
-
-    values_combinatorial = combinatorial_exact_shapley(linear_utility, progress=False)
-    check_total_value(linear_utility, values_combinatorial, atol=total_atol)
-
-    values_permutation = permutation_exact_shapley(linear_utility, progress=False)
-    check_total_value(linear_utility, values_permutation, atol=total_atol)
-
-    check_values(values_combinatorial, values_permutation, rtol=rtol)
+def test_games(fun, test_game, rtol, total_atol):
+    values_p = fun(test_game.u)
+    exact_values = test_game.shapley_values()
+    check_total_value(test_game.u, values_p, atol=total_atol)
+    check_values(values_p, exact_values, rtol=rtol)
 
 
 @pytest.mark.parametrize(
@@ -73,7 +44,6 @@ def test_linear(
 def test_grouped_linear(
     linear_dataset,
     num_groups,
-    memcache_client_config,
     scorer,
     cache_backend,
     rtol=0.01,
@@ -112,9 +82,7 @@ def test_grouped_linear(
         (2, 1, 20, "r2"),
     ],
 )
-def test_linear_with_outlier(
-    linear_dataset, memcache_client_config, scorer, cache_backend, total_atol=1e-5
-):
+def test_linear_with_outlier(linear_dataset, scorer, cache_backend, total_atol=1e-5):
     outlier_idx = np.random.randint(len(linear_dataset.y_train))
     linear_dataset.y_train[outlier_idx] -= 100
     linear_utility = Utility(
@@ -173,7 +141,6 @@ def test_polynomial(
 def test_polynomial_with_outlier(
     polynomial_dataset,
     polynomial_pipeline,
-    memcache_client_config,
     scorer,
     cache_backend,
     total_atol=1e-5,
diff --git a/tests/value/shapley/test_truncated.py b/tests/value/shapley/test_truncated.py
index ac980ab96..7d5977216 100644
--- a/tests/value/shapley/test_truncated.py
+++ b/tests/value/shapley/test_truncated.py
@@ -8,7 +8,7 @@
 from pydvl.utils.score import Scorer, squashed_r2
 from pydvl.value import compute_shapley_values
 from pydvl.value.shapley import ShapleyMode
-from pydvl.value.shapley.truncated import NoTruncation
+from pydvl.value.shapley.truncated import FixedTruncation, NoTruncation
 from pydvl.value.stopping import HistoryDeviation, MaxUpdates
 
 from .. import check_total_value, check_values
@@ -16,92 +16,49 @@
 log = logging.getLogger(__name__)
 
 
-# noinspection PyTestParametrized
 @pytest.mark.parametrize(
-    "num_samples, fun, rtol, atol, kwargs",
+    "test_game",
     [
-        (
-            12,
-            ShapleyMode.TruncatedMontecarlo,
-            0.1,
-            1e-5,
-            dict(
-                done=MaxUpdates(500),
-                truncation=NoTruncation(),
-            ),
-        ),
+        ("symmetric-voting", {"n_players": 6}),
+        ("shoes", {"left": 3, "right": 4}),
     ],
+    indirect=["test_game"],
 )
-def test_tmcs_analytic_montecarlo_shapley(
-    num_samples,
-    analytic_shapley,
-    parallel_config,
-    n_jobs,
-    fun: ShapleyMode,
-    rtol: float,
-    atol: float,
-    kwargs: dict,
-):
-    u, exact_values = analytic_shapley
-
-    values = compute_shapley_values(
-        u, mode=fun, n_jobs=n_jobs, config=parallel_config, progress=False, **kwargs
-    )
-
-    check_values(values, exact_values, rtol=rtol, atol=atol)
-
-
 @pytest.mark.parametrize(
-    "a, b, num_points", [(2, 0, 21)]  # training set will have 0.3 * 21 = 6 samples
-)
-@pytest.mark.parametrize("scorer, rtol", [(squashed_r2, 0.25)])
-@pytest.mark.parametrize(
-    "fun, kwargs",
+    "done, truncation_cls, truncation_kwargs",
     [
-        (
-            ShapleyMode.TruncatedMontecarlo,
-            dict(
-                done=MaxUpdates(500),
-                truncation=NoTruncation(),
-            ),
-        ),
+        (MaxUpdates(600), NoTruncation, dict()),
+        (MaxUpdates(600), FixedTruncation, dict(fraction=0.9)),
     ],
 )
-def test_tmcs_linear_montecarlo_shapley(
-    linear_shapley,
+def test_games(
+    test_game,
+    parallel_config,
     n_jobs,
-    memcache_client_config,
-    scorer: Scorer,
-    rtol: float,
-    fun: ShapleyMode,
-    kwargs: dict,
+    done,
+    truncation_cls,
+    truncation_kwargs,
+    seed,
 ):
-    """Tests values for all methods using a linear dataset.
-
-    For permutation and truncated montecarlo, the rtol for each scorer is chosen
-    so that the number of samples selected is just above the (ε,δ) bound for ε =
-    rtol, δ=0.001 and the range corresponding to each score. This means that
-    roughly once every 1000/num_methods runs the test will fail.
-
-    FIXME:
-     - For permutation, we must increase the number of samples above that what
-       is done for truncated, this is probably due to the averaging done by the
-       latter to reduce variance
-     - We don't have a bound for Owen.
-    NOTE:
-     - The variance in the combinatorial method is huge, so we need lots of
-       samples
-
-    """
-    u, exact_values = linear_shapley
-    check_total_value(u, exact_values, rtol=rtol)
+    try:
+        truncation = truncation_cls(test_game.u, **truncation_kwargs)
+    except TypeError:
+        # The NoTruncation class's constructor doesn't take any arguments
+        truncation = truncation_cls(**truncation_kwargs)
 
     values = compute_shapley_values(
-        u, mode=fun, progress=False, n_jobs=n_jobs, **kwargs
+        test_game.u,
+        mode=ShapleyMode.TruncatedMontecarlo,
+        done=done,
+        truncation=truncation,
+        n_jobs=n_jobs,
+        config=parallel_config,
+        seed=seed,
+        progress=True,
     )
 
-    check_values(values, exact_values, rtol=rtol)
-    check_total_value(u, values, rtol=rtol)  # FIXME, could be more than rtol
+    exact_values = test_game.shapley_values()
+    check_values(values, exact_values, rtol=0.2, atol=1e-4)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/value/test_semivalues.py b/tests/value/test_semivalues.py
index 50a0201b7..e33f92543 100644
--- a/tests/value/test_semivalues.py
+++ b/tests/value/test_semivalues.py
@@ -1,4 +1,5 @@
 import math
+from itertools import islice
 from typing import Type
 
 import numpy as np
@@ -17,6 +18,7 @@
 )
 from pydvl.value.semivalues import (
     SVCoefficient,
+    _marginal,
     banzhaf_coefficient,
     beta_coefficient,
     compute_generic_semivalues,
@@ -28,12 +30,112 @@
 from .utils import timed
 
 
-@pytest.mark.parametrize("num_samples", [5])
+@pytest.mark.parametrize(
+    "test_game",
+    [
+        ("shoes", {"left": 3, "right": 2}),
+    ],
+    indirect=["test_game"],
+)
+@pytest.mark.parametrize(
+    "sampler, coefficient, batch_size",
+    [(PermutationSampler, beta_coefficient(1, 1), 5)],
+)
+def test_marginal_batch_size(test_game, sampler, coefficient, batch_size, seed):
+    # TODO: This test is probably not needed.
+    # Because I added it and then realized that it doesn't do much.
+    # The only difference between the two calls is that for the first one
+    # the loop is outside and the second one the loop is inside.
+    sampler_it = iter(sampler(test_game.u.data.indices, seed=seed))
+    samples = tuple(islice(sampler_it, batch_size))
+
+    marginals_single = []
+    for sample in samples:
+        marginals_single.extend(
+            _marginal(test_game.u, coefficient=coefficient, samples=[sample])
+        )
+
+    marginals_batch = _marginal(test_game.u, coefficient=coefficient, samples=samples)
+
+    assert len(marginals_single) == len(marginals_batch)
+    assert set(marginals_single) == set(marginals_batch)
+
+
+@pytest.mark.parametrize("n", [10, 100])
+@pytest.mark.parametrize(
+    "coefficient",
+    [
+        beta_coefficient(1, 1),
+        beta_coefficient(1, 16),
+        beta_coefficient(4, 1),
+        banzhaf_coefficient,
+        shapley_coefficient,
+    ],
+)
+def test_coefficients(n: int, coefficient: SVCoefficient):
+    r"""Coefficients for semi-values must fulfill:
+
+    $$ \sum_{i=1}^{n}\choose{n-1}{j-1}w^{(n)}(j) = 1 $$
+
+    Note that we depart from the usual definitions by including the factor $1/n$
+    in the shapley and beta coefficients.
+    """
+    s = [math.comb(n - 1, j - 1) * coefficient(n, j - 1) for j in range(1, n + 1)]
+    assert np.isclose(1, np.sum(s))
+
+
+@pytest.mark.parametrize(
+    "test_game",
+    [
+        ("symmetric-voting", {"n_players": 4}),
+        ("shoes", {"left": 1, "right": 1}),
+        ("shoes", {"left": 2, "right": 1}),
+        ("shoes", {"left": 1, "right": 2}),
+    ],
+    indirect=["test_game"],
+)
 @pytest.mark.parametrize(
     "sampler",
     [
         DeterministicUniformSampler,
         DeterministicPermutationSampler,
+    ],
+)
+@pytest.mark.parametrize("coefficient", [shapley_coefficient, beta_coefficient(1, 1)])
+def test_games_shapley_deterministic(
+    test_game,
+    parallel_config,
+    n_jobs,
+    sampler: Type[PowersetSampler],
+    coefficient: SVCoefficient,
+    seed: Seed,
+):
+    criterion = MaxUpdates(50)
+    values = compute_generic_semivalues(
+        sampler(test_game.u.data.indices, seed=seed),
+        test_game.u,
+        coefficient,
+        criterion,
+        skip_converged=True,
+        n_jobs=n_jobs,
+        config=parallel_config,
+        progress=True,
+    )
+    exact_values = test_game.shapley_values()
+    check_values(values, exact_values, rtol=0.1)
+
+
+@pytest.mark.parametrize(
+    "test_game",
+    [
+        ("symmetric-voting", {"n_players": 6}),
+        ("shoes", {"left": 3, "right": 2}),
+    ],
+    indirect=["test_game"],
+)
+@pytest.mark.parametrize(
+    "sampler",
+    [
         UniformSampler,
         PermutationSampler,
         pytest.param(AntitheticSampler, marks=pytest.mark.slow),
@@ -41,36 +143,55 @@
     ],
 )
 @pytest.mark.parametrize("coefficient", [shapley_coefficient, beta_coefficient(1, 1)])
-def test_shapley(
-    num_samples: int,
-    analytic_shapley,
+def test_games_shapley(
+    test_game,
+    parallel_config,
+    n_jobs,
     sampler: Type[PowersetSampler],
     coefficient: SVCoefficient,
-    n_jobs: int,
-    parallel_config: ParallelConfig,
     seed: Seed,
 ):
-    u, exact_values = analytic_shapley
-    criterion = HistoryDeviation(50, 1e-3) | MaxUpdates(1000)
+    criterion = HistoryDeviation(50, 1e-4) | MaxUpdates(500)
     values = compute_generic_semivalues(
-        sampler(u.data.indices, seed=seed),
-        u,
+        sampler(test_game.u.data.indices, seed=seed),
+        test_game.u,
         coefficient,
         criterion,
         skip_converged=True,
         n_jobs=n_jobs,
         config=parallel_config,
+        progress=True,
     )
+
+    exact_values = test_game.shapley_values()
     check_values(values, exact_values, rtol=0.2)
 
 
 @pytest.mark.parametrize(
-    "num_samples,sampler,coefficient,batch_size",
-    [(5, PermutationSampler, beta_coefficient(1, 1), 5)],
+    "test_game",
+    [
+        ("shoes", {"left": 3, "right": 2}),
+    ],
+    indirect=["test_game"],
+)
+@pytest.mark.parametrize(
+    "sampler, coefficient, batch_size",
+    [(PermutationSampler, beta_coefficient(1, 1), 5)],
+)
+@pytest.mark.parametrize(
+    "n_jobs",
+    [
+        1,
+        pytest.param(
+            2,
+            marks=pytest.mark.xfail(
+                reason="Bad interaction between parallelization and batching"
+            ),
+        ),
+    ],
 )
 def test_shapley_batch_size(
-    num_samples: int,
-    analytic_shapley,
+    test_game,
     sampler: Type[PermutationSampler],
     coefficient: SVCoefficient,
     batch_size: int,
@@ -78,13 +199,12 @@ def test_shapley_batch_size(
     parallel_config: ParallelConfig,
     seed: Seed,
 ):
-    u, exact_values = analytic_shapley
     timed_fn = timed(compute_generic_semivalues)
     result_single_batch = timed_fn(
-        sampler(u.data.indices, seed=seed),
-        u,
+        sampler(test_game.u.data.indices, seed=seed),
+        test_game.u,
         coefficient,
-        done=HistoryDeviation(50, 1e-3) | MaxUpdates(1000),
+        done=MaxUpdates(100),
         skip_converged=True,
         n_jobs=n_jobs,
         batch_size=1,
@@ -93,10 +213,10 @@ def test_shapley_batch_size(
     total_seconds_single_batch = timed_fn.execution_time
 
     result_multi_batch = timed_fn(
-        sampler(u.data.indices, seed=seed),
-        u,
+        sampler(test_game.u.data.indices, seed=seed),
+        test_game.u,
         coefficient,
-        done=HistoryDeviation(50, 1e-3) | MaxUpdates(1000),
+        done=MaxUpdates(100),
         skip_converged=True,
         n_jobs=n_jobs,
         batch_size=batch_size,
@@ -141,26 +261,3 @@ def test_banzhaf(
         config=parallel_config,
     )
     check_values(values, exact_values, rtol=0.2)
-
-
-@pytest.mark.parametrize("n", [10, 100])
-@pytest.mark.parametrize(
-    "coefficient",
-    [
-        beta_coefficient(1, 1),
-        beta_coefficient(1, 16),
-        beta_coefficient(4, 1),
-        banzhaf_coefficient,
-        shapley_coefficient,
-    ],
-)
-def test_coefficients(n: int, coefficient: SVCoefficient):
-    r"""Coefficients for semi-values must fulfill:
-
-    $$ \sum_{i=1}^{n}\choose{n-1}{j-1}w^{(n)}(j) = 1 $$
-
-    Note that we depart from the usual definitions by including the factor $1/n$
-    in the shapley and beta coefficients.
-    """
-    s = [math.comb(n - 1, j - 1) * coefficient(n, j - 1) for j in range(1, n + 1)]
-    assert np.isclose(1, np.sum(s))