diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 9ea7665a..251bc0f6 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -5,6 +5,14 @@ on:
   push:
     branches: [ master ]
 
+concurrency:
+  # github.workflow: name of the workflow
+  # github.event.pull_request.number || github.ref: pull request number or branch name if not a pull request
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+
+  # Cancel in-progress runs when a new workflow with the same group name is triggered
+  cancel-in-progress: true
+
 jobs:
   ci:
     runs-on: ubuntu-latest
@@ -24,12 +32,13 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: "3.10"
+          cache: "pip"
 
       - name: Pre-commit checks
         uses: pre-commit/action@v3.0.1
 
       - name: Install package & dependencies
-        run: pip install --editable '.[dev, docs, tests, examples]'
+        run: pip install '.[tests]'
 
       - name: Launch tests & generate coverage report
         run: coverage run -m pytest tests
@@ -39,6 +48,10 @@ jobs:
         uses: py-cov-action/python-coverage-comment-action@v3
         with:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          # If the coverage percentage is above or equal to this value, the badge will be green.
+          MINIMUM_GREEN: 90
+          # If the coverage percentage is below this value, the badge will be red.
+          MINIMUM_ORANGE: 80
 
       - name: Store Pull Request comment to be posted
         uses: actions/upload-artifact@v4
diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml
index 6e24509e..bdc7bdb9 100644
--- a/.github/workflows/coverage.yaml
+++ b/.github/workflows/coverage.yaml
@@ -22,9 +22,12 @@ jobs:
       # artifact that contains the comment to be published
       actions: read
     steps:
-
       - name: Post comment
         uses: py-cov-action/python-coverage-comment-action@v3
         with:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           GITHUB_PR_RUN_ID: ${{ github.event.workflow_run.id }}
+          # If the coverage percentage is above or equal to this value, the badge will be green.
+          MINIMUM_GREEN: 90
+          # If the coverage percentage is below this value, the badge will be red.
+          MINIMUM_ORANGE: 80
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
index a00ff0fc..9d818f8e 100644
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
   push:
     tags:
-      - v.**
+      - 'v[0-9]+\.[0-9]+\.[0-9]+'
 
 jobs:
   publish:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
new file mode 100644
index 00000000..a42b21ff
--- /dev/null
+++ b/.github/workflows/test.yaml
@@ -0,0 +1,48 @@
+name: Run tests
+
+on:
+  pull_request:
+  push:
+    branches: [ master ]
+
+concurrency:
+  # github.workflow: name of the workflow
+  # github.event.pull_request.number || github.ref: pull request number or branch name if not a pull request
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+
+  # Cancel in-progress runs when a new workflow with the same group name is triggered
+  cancel-in-progress: true
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [ "3.9", "3.10", "3.11" ]
+        TORCH_VERSION: [ "1.13.1", "2.0.1", "2.1.2", "2.2.2", "2.3.1", "2.4.0" ]
+        include:
+          - python-version: "3.12"
+            TORCH_VERSION: "2.4.0"
+        exclude:
+          - python-version: "3.11"
+            TORCH_VERSION: "1.13.1"
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+
+      - name: Install dependencies and PyTorch ${{ matrix.TORCH_VERSION }}
+        run: |
+          pip install --upgrade pip
+          pip install torch==${{ matrix.TORCH_VERSION }} '.[tests]'
+
+      - name: Launch tests
+        # Only run the unit tests, not the pipeline tests.
+        # Pipeline tests are too expensive to run for every python/PyTorch version.
+        # However, they are run as part the coverage job in the CI workflow
+        run: pytest --ignore=tests/pipeline tests
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0aac89dc..6a14464c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ repos:
         files: \.py$
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.5.6
+    rev: v0.5.7
     hooks:
       - id: ruff
         types_or: [ python, pyi, jupyter ]
diff --git a/CITATION.cff b/CITATION.cff
index 02fb739a..fff6f20e 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -7,6 +7,8 @@ authors:
       given-names: "Juan"
     - family-names: "Hashemizadeh"
       given-names: "Meraj"
+    - family-names: "Lacoste-Julien"
+      given-names: "Simon"
 title: "Cooper: a toolkit for Lagrangian-based constrained optimization"
 date-released: 2022-03-15
 url: "https://github.com/cooper-org/cooper"
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 46f3d1ef..00000000
--- a/Makefile
+++ /dev/null
@@ -1,89 +0,0 @@
-.PHONY: clean clean-build clean-pyc clean-test coverage dist docs help install lint lint/flake8 lint/black
-.DEFAULT_GOAL := help
-
-define BROWSER_PYSCRIPT
-import os, webbrowser, sys
-
-from urllib.request import pathname2url
-
-webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
-endef
-export BROWSER_PYSCRIPT
-
-define PRINT_HELP_PYSCRIPT
-import re, sys
-
-for line in sys.stdin:
-	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
-	if match:
-		target, help = match.groups()
-		print("%-20s %s" % (target, help))
-endef
-export PRINT_HELP_PYSCRIPT
-
-BROWSER := python -c "$$BROWSER_PYSCRIPT"
-
-help:
-	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
-
-clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
-
-clean-build: ## remove build artifacts
-	rm -fr build/
-	rm -fr dist/
-	rm -fr .eggs/
-	find . -name '*.egg-info' -exec rm -fr {} +
-	find . -name '*.egg' -exec rm -f {} +
-
-clean-pyc: ## remove Python file artifacts
-	find . -name '*.pyc' -exec rm -f {} +
-	find . -name '*.pyo' -exec rm -f {} +
-	find . -name '*~' -exec rm -f {} +
-	find . -name '__pycache__' -exec rm -fr {} +
-
-clean-test: ## remove test and coverage artifacts
-	rm -fr .tox/
-	rm -f .coverage
-	rm -fr htmlcov/
-	rm -fr .pytest_cache
-
-lint/flake8: ## check style with flake8
-	flake8 cooper tests
-lint/black: ## check style with black
-	black --check cooper tests
-
-lint: lint/flake8 lint/black ## check style
-
-test: ## run tests quickly with the default Python
-	pytest
-
-test-all: ## run tests on every Python version with tox
-	tox
-
-coverage: ## check code coverage quickly with the default Python
-	coverage run --source cooper -m pytest
-	coverage report -m
-	coverage html
-	$(BROWSER) htmlcov/index.html
-
-docs: ## generate Sphinx HTML documentation, including API docs
-	rm -f docs/cooper.rst
-	rm -f docs/modules.rst
-	sphinx-apidoc -o docs/ cooper
-	$(MAKE) -C docs clean
-	$(MAKE) -C docs html
-	$(BROWSER) docs/_build/html/index.html
-
-servedocs: docs ## compile the docs watching for changes
-	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
-
-release: dist ## package and upload a release
-	twine upload dist/*
-
-dist: clean ## builds source and wheel package
-	python setup.py sdist
-	python setup.py bdist_wheel
-	ls -l dist
-
-install: clean ## install the package to the active Python's site-packages
-	python setup.py install
diff --git a/README.md b/README.md
index 537778cf..433922a1 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,17 @@
 # Cooper
 
-[![LICENSE](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/cooper-org/cooper/tree/master/LICENSE)
+[![LICENSE](https://img.shields.io/pypi/l/cooper-optim)](https://github.com/cooper-org/cooper/tree/master/LICENSE)
+[![Version](https://img.shields.io/pypi/v/cooper-optim?label=version)](https://pypi.python.org/pypi/cooper-optim)
+[![Downloads](https://static.pepy.tech/badge/cooper-optim)](https://pypi.python.org/pypi/cooper-optim)
+[![Python](https://img.shields.io/pypi/pyversions/cooper-optim)](https://pypi.python.org/pypi/cooper-optim)
 [![DOCS](https://readthedocs.org/projects/cooper/badge/?version=latest)](https://cooper.readthedocs.io/en/latest/?version=latest)
-[![Build and Test](https://github.com/cooper-org/cooper/actions/workflows/build.yml/badge.svg)](https://github.com/cooper-org/cooper/actions/workflows/build.yml)
-[![Coverage](https://codecov.io/gh/cooper-org/cooper/graph/badge.svg?token=4U41P8JCE1)](https://codecov.io/gh/cooper-org/cooper)
-[![HitCount](https://hits.dwyl.com/cooper-org/cooper.svg?style=flat-square)](https://cooper.readthedocs.io/en/latest/?version=latest)
+[![Coverage badge](https://raw.githubusercontent.com/cooper-org/cooper/python-coverage-comment-action-data/badge.svg)](https://github.com/cooper-org/cooper/tree/python-coverage-comment-action-data)
+[![Continuous Integration](https://github.com/cooper-org/cooper/actions/workflows/ci.yml/badge.svg)](https://github.com/cooper-org/cooper/actions/workflows/ci.yml)
+[![Stars](https://img.shields.io/github/stars/cooper-org/cooper)](https://github.com/cooper-org/cooper)
+[![HitCount](https://img.shields.io/endpoint?url=https://hits.dwyl.com/cooper-org/cooper.json&color=brightgreen)](https://cooper.readthedocs.io/en/latest/?version=latest)
 [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/cooper-org/cooper/issues)
+[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?logo=discord&logoColor=white)](https://discord.gg/Aq5PjH8m6E)
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 
 ## About
 
@@ -103,7 +109,7 @@ for epoch_num in range(50):
 ### Basic Installation
 
 ```bash
-pip install git+https://github.com/cooper-org/cooper.git
+pip install cooper-optim
 ```
 
 ### Development Installation
@@ -111,12 +117,13 @@ pip install git+https://github.com/cooper-org/cooper.git
 First, clone the [repository](https://github.com/cooper-org/cooper), navigate
 to the **Cooper** root directory and install the package in development mode by running:
 
-| Setting     | Command                                  | Notes                                     |
-| ----------- | ---------------------------------------- | ----------------------------------------- |
-| Development | `pip install --editable ".[dev, tests]"` | Editable mode. Matches test environment.  |
-| Docs        | `pip install --editable ".[docs]"`       | Used to re-generate the documentation.    |
-| Tutorials   | `pip install --editable ".[examples]"`   | Install dependencies for running examples |
-| No Tests    | `pip install --editable .`               | Editable mode, without tests.             |
+| Setting     | Command                                 | Notes                                           |
+|-------------|-----------------------------------------|-------------------------------------------------|
+| No Tests    | `pip install --editable .`              | Editable mode, without tests.                   |
+| Development | `pip install --editable ".[test]"`      | Editable mode. Matches test environment.        |
+| Development | `pip install --editable ".[dev]"`       | Editable mode. Matches development environment. |
+| Tutorials   | `pip install --editable ".[notebooks]"` | Install dependencies for running notebooks.     |
+| Docs        | `pip install --editable ".[docs]"`      | Used to generate the documentation.             |
 
 ## Package structure
 
@@ -172,7 +179,7 @@ the snippet below:
 
 ```bibtex
 @misc{gallegoPosada2022cooper,
-    author={Gallego-Posada, Jose and Ramirez, Juan},
+    author={Gallego-Posada, Jose and Ramirez, Juan and Hashemizadeh, Meraj and Lacoste-Julien, Simon},
     title={Cooper: a toolkit for Lagrangian-based constrained optimization},
     howpublished={\url{https://github.com/cooper-org/cooper}},
     year={2022}
diff --git a/docs/source/additional_features.md b/docs/source/additional_features.md
index f4ae7bfb..e36ed7b0 100644
--- a/docs/source/additional_features.md
+++ b/docs/source/additional_features.md
@@ -8,8 +8,6 @@ In this section we provide details on using "advanced features" such as
 alternating updates, or the Augmented Lagrangian method, in conjunction with a
 {py:class}`~cooper.optim.constrained_optimizers.ConstrainedOptimizer`.
 
-______________________________________________________________________
-
 (alternating-updates)=
 
 ## Alternating updates
@@ -25,21 +23,19 @@ variables. This two-stage process is handled by **Cooper** inside the
 
 One can perform alternating updates in which the primal parameters are updated first. We
 refer to this update strategy as `cooper.optim.AlternationType.PRIMAL_DUAL`.
-.. math:
 
-```
+$$
 x_{t+1} &= \texttt{primal_optimizers_update} \left( x_{t}, \nabla_{x} \mathcal{L}_{c_t}(x, \lambda_t)|_{x=x_t} \right)\\
 \lambda_{t+1} &= \texttt{dual_optimizer_update} \left( \lambda_{t}, {\color{red} \mathbf{-}} \nabla_{\lambda} \mathcal{L}({\color{red} x_{t+1}}, \lambda)|_{\lambda=\lambda_t} \right)
-```
+$$
 
 Alternative, `cooper.optim.AlternationType.DUAL_PRIMAL` carries out an update of the
 dual parameters first.
-.. math:
 
-```
+$$
 \lambda_{t+1} &= \texttt{dual_optimizer_update} \left( \lambda_{t}, {\color{red} \mathbf{-}} \nabla_{\lambda} \mathcal{L}({\color{red} x_{t}}, \lambda)|_{\lambda=\lambda_t} \right) \\
 x_{t+1} &= \texttt{primal_optimizers_update} \left( x_{t}, \nabla_{x} \mathcal{L}_{c_t}(x, \lambda_{t+1})|_{x=x_t} \right)
-```
+$$
 
 :::{important}
 Selecting `alternation_type=AlternationType.DualPrimal` does not double the number
@@ -57,8 +53,6 @@ allows for updating the Lagrange multiplier without having to re-evaluate
 the loss function, but rather only the constraints.
 :::
 
-______________________________________________________________________
-
 (augmented-lagrangian-const-opt)=
 
 ## Augmented Lagrangian method
@@ -172,8 +166,6 @@ for step_id in range(1000):
     coop.dual_scheduler.step()
 ```
 
-______________________________________________________________________
-
 (multiple-primal-optimizers)=
 
 # Multiple primal optimizers
diff --git a/docs/source/constrained_optimizer.md b/docs/source/constrained_optimizer.md
index 5b3ced5c..412d5f9b 100644
--- a/docs/source/constrained_optimizer.md
+++ b/docs/source/constrained_optimizer.md
@@ -211,8 +211,7 @@ will be equivalent to performing `optimizer.step()` on all of the
 primal parameters.
 :::
 
-```{eval-rst}
-.. include:: additional_features.rst
+```{include} additional_features.md
 ```
 
 ```{eval-rst}
diff --git a/docs/source/lagrangian_formulation.md b/docs/source/lagrangian_formulation.md
index 3796cc6e..755b2819 100644
--- a/docs/source/lagrangian_formulation.md
+++ b/docs/source/lagrangian_formulation.md
@@ -90,7 +90,6 @@ in Fig 1. of {cite:t}`cotter2019JMLR`.
 ```{eval-rst}
 .. autoclass:: LagrangianFormulation
     :members:
-
 ```
 
 ```{eval-rst}
@@ -144,8 +143,6 @@ in **Cooper** as a
 ```{eval-rst}
 .. autoclass:: AugmentedLagrangianFormulation
     :members:
-
-
 ```
 
 ```{eval-rst}
diff --git a/docs/source/notebooks/plot_mnist_logistic_regression.ipynb b/docs/source/notebooks/plot_mnist_logistic_regression.ipynb
index 7b24ff32..bfebfded 100644
--- a/docs/source/notebooks/plot_mnist_logistic_regression.ipynb
+++ b/docs/source/notebooks/plot_mnist_logistic_regression.ipynb
@@ -136,7 +136,7 @@
     "    start_epoch = 0\n",
     "    all_metrics = defaultdict(list)\n",
     "else:\n",
-    "    checkpoint = torch.load(checkpoint_path + \"/checkpoint.pth\")\n",
+    "    checkpoint = torch.load(checkpoint_path + \"/checkpoint.pth\", weights_only=True)\n",
     "    batch_ix = checkpoint[\"batch_ix\"]\n",
     "    start_epoch = checkpoint[\"epoch\"] + 1\n",
     "    all_metrics = checkpoint[\"all_metrics\"]\n",
@@ -182,7 +182,7 @@
     "del batch_ix, all_metrics, model, cmp, cooper_optimizer\n",
     "\n",
     "# Post-training analysis and plotting\n",
-    "all_metrics = torch.load(checkpoint_path + \"/checkpoint.pth\")[\"all_metrics\"]\n",
+    "all_metrics = torch.load(checkpoint_path + \"/checkpoint.pth\", weights_only=True)[\"all_metrics\"]\n",
     "\n",
     "fig, (ax0, ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=4, sharex=True, figsize=(18, 4))\n",
     "\n",
diff --git a/docs/source/notebooks/plot_mnist_logistic_regression.md b/docs/source/notebooks/plot_mnist_logistic_regression.md
index bc76a855..bc990f64 100644
--- a/docs/source/notebooks/plot_mnist_logistic_regression.md
+++ b/docs/source/notebooks/plot_mnist_logistic_regression.md
@@ -121,7 +121,7 @@ if not os.path.isfile(checkpoint_path + "/checkpoint.pth"):
     start_epoch = 0
     all_metrics = defaultdict(list)
 else:
-    checkpoint = torch.load(checkpoint_path + "/checkpoint.pth")
+    checkpoint = torch.load(checkpoint_path + "/checkpoint.pth", weights_only=True)
     batch_ix = checkpoint["batch_ix"]
     start_epoch = checkpoint["epoch"] + 1
     all_metrics = checkpoint["all_metrics"]
@@ -167,7 +167,7 @@ for epoch_num in range(start_epoch, 7):
 del batch_ix, all_metrics, model, cmp, cooper_optimizer
 
 # Post-training analysis and plotting
-all_metrics = torch.load(checkpoint_path + "/checkpoint.pth")["all_metrics"]
+all_metrics = torch.load(checkpoint_path + "/checkpoint.pth", weights_only=True)["all_metrics"]
 
 fig, (ax0, ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=4, sharex=True, figsize=(18, 4))
 
diff --git a/docs/source/optim.md b/docs/source/optim.md
index 511e140d..eb3101a3 100644
--- a/docs/source/optim.md
+++ b/docs/source/optim.md
@@ -95,7 +95,6 @@ directly via the dual learning rate scheduler.
 
 ```{eval-rst}
 .. automethod:: cooper.optim.partial_scheduler
-
 ```
 
 (extra-gradient-optimizers)=
diff --git a/docs/source/problem.md b/docs/source/problem.md
index 056426f5..82defb42 100644
--- a/docs/source/problem.md
+++ b/docs/source/problem.md
@@ -58,7 +58,6 @@ entail a compromise in the stability of the optimization process.
 ```{eval-rst}
 .. autoclass:: CMPState
     :members: as_tuple
-
 ```
 
 For details on the use of proxy-constraints and the `proxy_ineq_defect` and
diff --git a/docs/source/references.bib b/docs/source/references.bib
index a569fb00..aa85570d 100644
--- a/docs/source/references.bib
+++ b/docs/source/references.bib
@@ -1,4 +1,3 @@
-
 @book{nocedal2006NumericalOptimization,
   title = {Numerical Optimization},
   author = {Nocedal, Jorge and Wright, Stephen J.},
@@ -72,7 +71,6 @@ @inproceedings{lin2020gradient
   year      = {2020},
   url       = {https://proceedings.mlr.press/v119/lin20a.html}
 }
-
 @inproceedings{sutskever2013initialization,
   title     = {On the importance of initialization and momentum in deep learning},
   author    = {Sutskever, Ilya and Martens, James and Dahl, George and Hinton, Geoffrey},
diff --git a/pyproject.toml b/pyproject.toml
index 739ede65..39e46931 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,8 +1,8 @@
 [build-system]
 build-backend = "hatchling.build"
 requires = [
-    "hatchling>=1.1.0",
     "hatch-vcs>=0.4.0",
+    "hatchling>=1.1.0",
 ]
 
 [project]
@@ -35,7 +35,6 @@ classifiers = [
 ]
 dynamic = ["version"]
 dependencies = [
-    "numpy>=1.22.0,<2.0.0", # PyTorch 2.2.2 and older don't support NumPy 2.0.
     "torch>=1.13.1",
     "typing-extensions>=4.8.0",
 ]
@@ -43,43 +42,38 @@ dependencies = [
 [project.optional-dependencies]
 dev = [
     "build>=1.2.1",
-    "cvxpy~=1.5.1",
-    "mypy~=1.7.1",
-    "pre-commit>=3.6.0",
-    "pytest~=8.2.2",
-    "pytest-cov==4.1.0",
-    "ruff~=0.5.6",
-    "tox==4.11.4",
-    "twine==4.0.2",
+    "coverage>=7.6.1",
+    "cvxpy>=1.5.2",
+    "jupytext==1.16.3",
+    "mypy>=1.11.1",
+    "numpy>=1.22.0,<2.0.0", # PyTorch 2.2.2 and older don't support NumPy 2.0.
+    "pre-commit>=3.7.1",
+    "pytest>=8.3.2",
+    "ruff==0.5.7",
+    "twine>=5.1.1",
 ]
 docs = [
-    "ipykernel>=6.5.0,<7.0.0",
-    "ipywidgets>=7.6.0,<8.0.0",
-    "jupytext>=1.16.3",
-    "matplotlib>=3.5.0,<4.0.0",
+    "matplotlib>=3.8.4,<4.0.0",
     "myst-nb>=1.1.1",
-    "sphinx>=4.3.1",
-    "sphinx-autobuild>=2021.3.14",
-    "sphinx-autodoc-typehints>=1.12.0",
-    "sphinx-copybutton>=0.4.0",
-    "sphinx-rtd-theme>=1.0.0",
-    "sphinxcontrib-bibtex>=2.4.1",
-    "sphinxcontrib-contentui>=0.2.5",
-    "sphinxcontrib-katex>=0.8.6",
+    "numpy>=1.22.0,<2.0.0", # PyTorch 2.2.2 and older don't support NumPy 2.0.
+    "sphinx>=7.4.7",
+    "sphinx-autobuild>=2024.4.16",
+    "sphinx-autodoc-typehints>=2.2.3",
+    "sphinx-copybutton>=0.5.2",
+    "sphinx-rtd-theme>=2.0.0",
+    "sphinxcontrib-bibtex>=2.6.2",
     "torchvision>=0.13.0,<1.0.0",
 ]
-examples = [
-    "ipykernel>=6.5.0,<7.0.0",
-    "ipywidgets>=7.6.0,<8.0.0",
-    "matplotlib>=3.5.0,<4.0.0",
-    "numpy==1.22.0",
+notebooks = [
+    "matplotlib>=3.8.4,<4.0.0",
+    "numpy>=1.22.0,<2.0.0", # PyTorch 2.2.2 and older don't support NumPy 2.0.
     "torchvision>=0.13.0,<1.0.0",
 ]
 tests = [
-    "cvxpy~=1.5.1",
-    "pytest~=8.2.2",
-    "pytest-cov==4.1.0",
-    "tox==4.11.4",
+    "coverage>=7.6.1",
+    "cvxpy>=1.5.2",
+    "numpy>=1.22.0,<2.0.0", # PyTorch 2.2.2 and older don't support NumPy 2.0.
+    "pytest>=8.3.2",
 ]
 
 [project.urls]
@@ -102,64 +96,65 @@ exclude = [
 source = "vcs"
 
 [tool.mypy]
-mypy_path = "cooper"
+packages = ["cooper"]
 warn_unused_configs = true
 
+[tool.coverage.run]
+relative_files = true
+
 [tool.ruff]
 line-length = 120
 target-version = "py39"
 extend-include = ["*.ipynb"]
 
 [tool.ruff.lint.isort]
-known-first-party = ["cooper", "tests"]
+known-first-party = ["cooper"]
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+
+[tool.ruff.lint.pycodestyle]
+max-doc-length = 88
 
 [tool.ruff.lint]
 preview = true
 select = ["ALL"]
 ignore = [
+    "ANN401",   # Any type annotation
     "B028",     # Stacklevel in warnings
-    "E501",     # Line length (handled by ruff-format)
-    "E731",     # Lambda function
-    "W505",     # Doc Line length
+    "COM812",   # Fixed by ruff-format
     "D1",       # TODO: Remove this line when we have docstrings for all functions
     "D205",     # 1 blank line required between summary line and description in docstrings
-    "DOC",      # Docstring missing exceptions/returns
+    "E501",     # Line length (handled by ruff-format)
+    "E731",     # Lambda function
+    "FURB140",  # Use itertools.starmap instead of list comprehension
     "ISC001",   # Fixed by ruff-format
-    "COM812",   # Fixed by ruff-format
-    "FA",       # Future type annotations
-    "CPY",      # Copyright notice
-    "TRY003",   # Long Exception message
-    "SLF",      # Private (underscore) attribute access
-    "EM",       # Exception message not in seperate msg variable
+    "NPY002",   # numpy.random.Generator is preferred over numpy.random.seed
     "PLR09",    # Too many arguments
     "PLR2004",  # Use of value instead of constant variable
     "PLR6104",  # Forces in-place operations, for example, x += 1 instead of x = x + 1
     "PLR6301",  # Self not used in method
     "PLW2901",  # For loop variable is overwritten
+    "RET504",   # Unnecessary assignment before return
+    "S101",     # Use of assert
+    "TRY003",   # Long Exception message
+    "W505",     # Doc Line length
+    "CPY",      # Copyright notice
+    "DOC",      # Docstring missing exceptions/returns
+    "EM",       # Exception message not in seperate msg variable
+    "FA",       # Future type annotations
     "FBT",      # Boolean trap
     "FIX",      # Fixmes
-    "TD",       # TODOs
-    "ANN401",   # Any type annotation
     "PTH",      # Use Pathlib instead of os.path
-    "FURB140",  # Use itertools.starmap instead of list comprehension
-    "NPY002",   # numpy.random.Generator is preferred over numpy.random.seed
-    "RET504",   # Unnecessary assignment before return
-    "S101",     # Use of assert
+    "SLF",      # Private (underscore) attribute access
+    "TD",       # TODOs
 ]
 
 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["F401"]
+"testing/*" = ["ANN", "N801", "N802", "N803", "N806"]
 "tests/*" = ["ANN", "C901", "N801", "N802", "N803", "N806"]
 "docs/*" = ["ANN"]
 "docs/source/conf.py" = ["A001", "ERA001", "INP001"]
 "docs/source/notebooks/*" = ["N801", "N802", "N803", "N806"]
 "src/cooper/optim/torch_optimizers/nupi_optimizer.py" = ["C901", "N801", "N802", "N803", "N806"]
-
-[tool.ruff.lint.pydocstyle]
-convention = "google"
-
-[tool.ruff.lint.pycodestyle]
-max-doc-length = 88
-
-[tool.coverage.run]
-relative_files = true
diff --git a/requirements.txt b/requirements.txt
index 7a155d77..9fb4985c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
---editable '.[dev, docs, tests, examples]'
+--editable '.[dev, docs, tests, notebooks]'
diff --git a/src/cooper/cmp.py b/src/cooper/cmp.py
index 68268652..7df2ff8c 100644
--- a/src/cooper/cmp.py
+++ b/src/cooper/cmp.py
@@ -67,22 +67,19 @@ def _compute_primal_or_dual_lagrangian(self, primal_or_dual: Literal["primal", "
         check_contributes_fn = lambda cs: getattr(cs, f"contributes_to_{primal_or_dual}_update")
         contributing_constraints = {c: cs for c, cs in self.observed_constraints.items() if check_contributes_fn(cs)}
 
-        if len(contributing_constraints) == 0:
-            if self.loss is None:
-                return LagrangianStore()
+        if not contributing_constraints:
             # No observed constraints contribute to the Lagrangian.
-            lagrangian = self.loss.clone() if primal_or_dual == "primal" else None
+            lagrangian = self.loss.clone() if primal_or_dual == "primal" and self.loss is not None else None
             return LagrangianStore(lagrangian=lagrangian)
 
         lagrangian = self.loss.clone() if primal_or_dual == "primal" and self.loss is not None else 0.0
-
         multiplier_values = {}
         penalty_coefficient_values = {}
+
         for constraint, constraint_state in contributing_constraints.items():
             contribution_store = constraint.compute_contribution_to_lagrangian(constraint_state, primal_or_dual)
             if contribution_store is not None:
                 lagrangian = lagrangian + contribution_store.lagrangian_contribution
-
                 multiplier_values[constraint] = contribution_store.multiplier_value
                 if contribution_store.penalty_coefficient_value is not None:
                     penalty_coefficient_values[constraint] = contribution_store.penalty_coefficient_value
diff --git a/testing/__init__.py b/testing/__init__.py
new file mode 100644
index 00000000..3f91c369
--- /dev/null
+++ b/testing/__init__.py
@@ -0,0 +1,8 @@
+from .cooper_helpers import (
+    AlternationType,
+    SquaredNormLinearCMP,
+    build_cooper_optimizer,
+    build_dual_optimizers,
+    build_primal_optimizers,
+)
+from .utils import frozen_rand_generator, validate_state_dicts
diff --git a/tests/helpers/cooper_test_utils.py b/testing/cooper_helpers.py
similarity index 100%
rename from tests/helpers/cooper_test_utils.py
rename to testing/cooper_helpers.py
diff --git a/tests/helpers/testing_utils.py b/testing/utils.py
similarity index 100%
rename from tests/helpers/testing_utils.py
rename to testing/utils.py
diff --git a/tests/constraints/test_constraint_state.py b/tests/constraints/test_constraint_state.py
index 21c26ea4..fe224ba0 100644
--- a/tests/constraints/test_constraint_state.py
+++ b/tests/constraints/test_constraint_state.py
@@ -4,7 +4,7 @@
 import torch
 
 import cooper
-from tests.helpers import testing_utils
+import testing
 
 
 @pytest.fixture(params=[1, 10])
@@ -14,7 +14,7 @@ def num_constraints(request):
 
 @pytest.fixture
 def violation(num_constraints):
-    violation = torch.randn(num_constraints, generator=testing_utils.frozen_rand_generator(0))
+    violation = torch.randn(num_constraints, generator=testing.frozen_rand_generator(0))
     if num_constraints == 1:
         violation.squeeze_()
     return violation
@@ -22,7 +22,7 @@ def violation(num_constraints):
 
 @pytest.fixture
 def strict_violation(num_constraints):
-    strict_violation = torch.randn(num_constraints, generator=testing_utils.frozen_rand_generator(1))
+    strict_violation = torch.randn(num_constraints, generator=testing.frozen_rand_generator(1))
     if num_constraints == 1:
         strict_violation.squeeze_()
     return strict_violation
@@ -30,12 +30,12 @@ def strict_violation(num_constraints):
 
 @pytest.fixture
 def constraint_features(num_constraints):
-    return torch.randperm(num_constraints, generator=testing_utils.frozen_rand_generator(2))
+    return torch.randperm(num_constraints, generator=testing.frozen_rand_generator(2))
 
 
 @pytest.fixture
 def strict_constraint_features(num_constraints):
-    return torch.randperm(num_constraints, generator=testing_utils.frozen_rand_generator(3))
+    return torch.randperm(num_constraints, generator=testing.frozen_rand_generator(3))
 
 
 @pytest.fixture(params=[True, False])
diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/multipliers/conftest.py b/tests/multipliers/conftest.py
index 69f7252b..62032d74 100644
--- a/tests/multipliers/conftest.py
+++ b/tests/multipliers/conftest.py
@@ -2,7 +2,7 @@
 import torch
 
 import cooper
-from tests.helpers import testing_utils
+import testing
 
 
 @pytest.fixture
@@ -27,7 +27,7 @@ def multiplier_class(request):
 
 @pytest.fixture
 def init_multiplier_tensor(constraint_type, num_constraints, random_seed):
-    generator = testing_utils.frozen_rand_generator(random_seed)
+    generator = testing.frozen_rand_generator(random_seed)
     raw_init = torch.randn(num_constraints, generator=generator)
     if constraint_type == cooper.ConstraintType.INEQUALITY:
         return raw_init.relu()
diff --git a/tests/multipliers/test_explicit_multipliers.py b/tests/multipliers/test_explicit_multipliers.py
index abe2b192..7819d672 100644
--- a/tests/multipliers/test_explicit_multipliers.py
+++ b/tests/multipliers/test_explicit_multipliers.py
@@ -5,7 +5,7 @@
 import torch
 
 import cooper
-from tests.helpers import testing_utils
+import testing
 
 
 def evaluate_multiplier(multiplier, all_indices):
@@ -126,7 +126,7 @@ def test_ineq_post_step_(constraint_type, multiplier_class, init_multiplier_tens
 
 
 def check_save_load_state_dict(multiplier, explicit_multiplier_class, num_constraints, random_seed):
-    generator = testing_utils.frozen_rand_generator(random_seed)
+    generator = testing.frozen_rand_generator(random_seed)
 
     multiplier_init = torch.randn(num_constraints, generator=generator)
     new_multiplier = explicit_multiplier_class(init=multiplier_init)
@@ -134,7 +134,7 @@ def check_save_load_state_dict(multiplier, explicit_multiplier_class, num_constr
     # Save to file to force reading from file so we can ensure correct loading
     with tempfile.TemporaryDirectory() as tmpdirname:
         torch.save(multiplier.state_dict(), os.path.join(tmpdirname, "multiplier.pt"))
-        state_dict = torch.load(os.path.join(tmpdirname, "multiplier.pt"))
+        state_dict = torch.load(os.path.join(tmpdirname, "multiplier.pt"), weights_only=True)
 
     new_multiplier.load_state_dict(state_dict)
 
diff --git a/tests/multipliers/test_penalty_coefficients.py b/tests/multipliers/test_penalty_coefficients.py
index 20d17057..e11c86c2 100644
--- a/tests/multipliers/test_penalty_coefficients.py
+++ b/tests/multipliers/test_penalty_coefficients.py
@@ -1,8 +1,8 @@
 import pytest
 import torch
 
+import testing
 from cooper import multipliers
-from tests.helpers import testing_utils
 
 
 @pytest.fixture(params=[1, 100])
@@ -12,7 +12,7 @@ def num_constraints(request):
 
 @pytest.fixture
 def init_tensor(num_constraints):
-    generator = testing_utils.frozen_rand_generator()
+    generator = testing.frozen_rand_generator()
     return torch.rand(num_constraints, generator=generator)
 
 
diff --git a/tests/pipeline/conftest.py b/tests/pipeline/conftest.py
index 9738dfc4..0a61a268 100644
--- a/tests/pipeline/conftest.py
+++ b/tests/pipeline/conftest.py
@@ -4,8 +4,7 @@
 import torch
 
 import cooper
-from cooper.multipliers import MultiplicativePenaltyCoefficientUpdater
-from tests.helpers import cooper_test_utils
+import testing
 
 PRIMAL_LR = 3e-2
 DUAL_LR = 2e-1
@@ -75,13 +74,13 @@ def extrapolation(request, formulation_type):
 
 @pytest.fixture(
     params=[
-        cooper_test_utils.AlternationType.FALSE,
-        cooper_test_utils.AlternationType.PRIMAL_DUAL,
-        cooper_test_utils.AlternationType.DUAL_PRIMAL,
+        testing.AlternationType.FALSE,
+        testing.AlternationType.PRIMAL_DUAL,
+        testing.AlternationType.DUAL_PRIMAL,
     ]
 )
 def alternation_type(request, extrapolation, formulation_type):
-    is_alternation = request.param != cooper_test_utils.AlternationType.FALSE
+    is_alternation = request.param != testing.AlternationType.FALSE
 
     if extrapolation and is_alternation:
         pytest.skip("Extrapolation is only supported for simultaneous updates.")
@@ -92,7 +91,7 @@ def alternation_type(request, extrapolation, formulation_type):
 
 @pytest.fixture
 def unconstrained_cmp(device, num_variables):
-    cmp = cooper_test_utils.SquaredNormLinearCMP(num_variables=num_variables, device=device)
+    cmp = testing.SquaredNormLinearCMP(num_variables=num_variables, device=device)
     return cmp
 
 
@@ -151,18 +150,16 @@ def cmp(
     cmp_kwargs[f"{prefix}_formulation_type"] = formulation_type
     cmp_kwargs[f"{prefix}_penalty_coefficient_type"] = penalty_coefficient_type
 
-    cmp = cooper_test_utils.SquaredNormLinearCMP(**cmp_kwargs)
+    cmp = testing.SquaredNormLinearCMP(**cmp_kwargs)
     return cmp
 
 
 @pytest.fixture
 def cooper_optimizer_no_constraint(unconstrained_cmp, params):
-    primal_optimizers = cooper_test_utils.build_primal_optimizers(
+    primal_optimizers = testing.build_primal_optimizers(
         params, primal_optimizer_kwargs=[{"lr": PRIMAL_LR} for _ in range(len(params))]
     )
-    cooper_optimizer = cooper_test_utils.build_cooper_optimizer(
-        cmp=unconstrained_cmp, primal_optimizers=primal_optimizers
-    )
+    cooper_optimizer = testing.build_cooper_optimizer(cmp=unconstrained_cmp, primal_optimizers=primal_optimizers)
     return cooper_optimizer
 
 
@@ -173,11 +170,11 @@ def cooper_optimizer(
     primal_optimizer_kwargs = [{"lr": PRIMAL_LR}]
     if use_multiple_primal_optimizers:
         primal_optimizer_kwargs.append({"lr": 10 * PRIMAL_LR, "betas": (0.0, 0.0), "eps": 10.0})
-    primal_optimizers = cooper_test_utils.build_primal_optimizers(
+    primal_optimizers = testing.build_primal_optimizers(
         params, extrapolation, primal_optimizer_kwargs=primal_optimizer_kwargs
     )
 
-    cooper_optimizer = cooper_test_utils.build_cooper_optimizer(
+    cooper_optimizer = testing.build_cooper_optimizer(
         cmp=cmp,
         primal_optimizers=primal_optimizers,
         extrapolation=extrapolation,
@@ -193,7 +190,7 @@ def cooper_optimizer(
 def penalty_updater(formulation_type):
     if formulation_type != cooper.AugmentedLagrangianFormulation:
         return None
-    penalty_updater = MultiplicativePenaltyCoefficientUpdater(
+    penalty_updater = cooper.multipliers.MultiplicativePenaltyCoefficientUpdater(
         growth_factor=PENALTY_GROWTH_FACTOR, violation_tolerance=PENALTY_VIOLATION_TOLERANCE
     )
     return penalty_updater
diff --git a/tests/pipeline/test_checkpoint.py b/tests/pipeline/test_checkpoint.py
index 3058d9b7..8ef98141 100644
--- a/tests/pipeline/test_checkpoint.py
+++ b/tests/pipeline/test_checkpoint.py
@@ -7,7 +7,7 @@
 import torch
 
 import cooper
-from tests.helpers import cooper_test_utils, testing_utils
+import testing
 
 DUAL_LR = 1e-2
 
@@ -33,7 +33,7 @@ def construct_cmp(multiplier_type, num_constraints, num_variables, device):
     A = torch.randn(num_constraints, num_variables, device=device, generator=generator)
     b = torch.randn(num_constraints, device=device, generator=generator)
 
-    return cooper_test_utils.SquaredNormLinearCMP(
+    return testing.SquaredNormLinearCMP(
         num_variables=num_variables,
         has_ineq_constraint=True,
         ineq_multiplier_type=multiplier_type,
@@ -53,8 +53,8 @@ def test_checkpoint(multiplier_type, use_multiple_primal_optimizers, num_constra
 
     cmp = construct_cmp(multiplier_type, num_constraints, num_variables, device)
 
-    primal_optimizers = cooper_test_utils.build_primal_optimizers(list(model.parameters()))
-    cooper_optimizer = cooper_test_utils.build_cooper_optimizer(
+    primal_optimizers = testing.build_primal_optimizers(list(model.parameters()))
+    cooper_optimizer = testing.build_cooper_optimizer(
         cmp=cmp, primal_optimizers=primal_optimizers, dual_optimizer_kwargs={"lr": DUAL_LR}
     )
     cooper_optimizer_class = type(cooper_optimizer)
@@ -77,9 +77,9 @@ def test_checkpoint(multiplier_type, use_multiple_primal_optimizers, num_constra
         del cooper_optimizer_state_dict_100
         del cmp_state_dict_100
 
-        model_state_dict_100 = torch.load(os.path.join(tmpdirname, "model.pt"))
-        cooper_optimizer_state_dict_100 = torch.load(os.path.join(tmpdirname, "cooper_optimizer.pt"))
-        cmp_state_dict_100 = torch.load(os.path.join(tmpdirname, "cmp.pt"))
+        model_state_dict_100 = torch.load(os.path.join(tmpdirname, "model.pt"), weights_only=True)
+        cooper_optimizer_state_dict_100 = torch.load(os.path.join(tmpdirname, "cooper_optimizer.pt"), weights_only=True)
+        cmp_state_dict_100 = torch.load(os.path.join(tmpdirname, "cmp.pt"), weights_only=True)
 
     # ------------ Train for *another* 100 steps ------------
     for _ in range(100):
@@ -100,10 +100,10 @@ def test_checkpoint(multiplier_type, use_multiple_primal_optimizers, num_constra
     loaded_model.load_state_dict(model_state_dict_100)
     loaded_model.to(device=device)
 
-    loaded_primal_optimizers = cooper_test_utils.build_primal_optimizers(list(loaded_model.parameters()))
+    loaded_primal_optimizers = testing.build_primal_optimizers(list(loaded_model.parameters()))
     loaded_dual_optimizers = None
     if any(new_cmp.constraints()):
-        loaded_dual_optimizers = cooper_test_utils.build_dual_optimizers(
+        loaded_dual_optimizers = testing.build_dual_optimizers(
             dual_parameters=new_cmp.dual_parameters(), dual_optimizer_kwargs={"lr": DUAL_LR}
         )
 
@@ -120,6 +120,6 @@ def test_checkpoint(multiplier_type, use_multiple_primal_optimizers, num_constra
 
     # ------------ Compare checkpoint and loaded-then-trained objects ------------
     # Compare 0-200 state_dicts versus the 0-100;100-200 state_dicts
-    assert testing_utils.validate_state_dicts(loaded_model.state_dict(), model_state_dict_200)
-    assert testing_utils.validate_state_dicts(loaded_cooper_optimizer.state_dict(), cooper_optimizer_state_dict_200)
-    assert testing_utils.validate_state_dicts(new_cmp.state_dict(), cmp_state_dict_200)
+    assert testing.validate_state_dicts(loaded_model.state_dict(), model_state_dict_200)
+    assert testing.validate_state_dicts(loaded_cooper_optimizer.state_dict(), cooper_optimizer_state_dict_200)
+    assert testing.validate_state_dicts(new_cmp.state_dict(), cmp_state_dict_200)
diff --git a/tests/pipeline/test_convergence.py b/tests/pipeline/test_convergence.py
index 8cdc0650..d19e2526 100644
--- a/tests/pipeline/test_convergence.py
+++ b/tests/pipeline/test_convergence.py
@@ -1,6 +1,6 @@
 import torch
 
-from tests.helpers import cooper_test_utils
+import testing
 
 
 def test_convergence_no_constraint(unconstrained_cmp, params, cooper_optimizer_no_constraint):
@@ -21,7 +21,7 @@ def test_convergence_with_constraint(
 
     for _ in range(2000):
         roll_kwargs = {"compute_cmp_state_kwargs": {"x": torch.cat(params)}}
-        if alternation_type == cooper_test_utils.AlternationType.PRIMAL_DUAL:
+        if alternation_type == testing.AlternationType.PRIMAL_DUAL:
             roll_kwargs["compute_violations_kwargs"] = {"x": torch.cat(params)}
 
         roll_out = cooper_optimizer.roll(**roll_kwargs)
diff --git a/tests/pipeline/test_manual.py b/tests/pipeline/test_manual.py
index a1c115a5..3c42fa0d 100644
--- a/tests/pipeline/test_manual.py
+++ b/tests/pipeline/test_manual.py
@@ -5,8 +5,7 @@
 import torch
 
 import cooper
-from cooper.multipliers import MultiplicativePenaltyCoefficientUpdater
-from tests.helpers import cooper_test_utils
+import testing
 
 PRIMAL_LR = 3e-2
 DUAL_LR = 2e-1
@@ -51,14 +50,14 @@ def test_manual_step(self, extrapolation, alternation_type):
         The manual implementation assumes Stochastic Gradient Descent (SGD) is used for both the primal
         and dual optimizers.
         """
-        if alternation_type == cooper_test_utils.AlternationType.PRIMAL_DUAL and self.is_indexed_multiplier:
+        if alternation_type == testing.AlternationType.PRIMAL_DUAL and self.is_indexed_multiplier:
             pytest.skip("Cannot test IndexedMultiplier with PRIMAL_DUAL alternation.")
 
         x = torch.nn.Parameter(torch.ones(self.num_variables, device=self.device))
         optimizer_class = cooper.optim.ExtraSGD if extrapolation else torch.optim.SGD
         primal_optimizers = optimizer_class([x], lr=PRIMAL_LR)
 
-        cooper_optimizer = cooper_test_utils.build_cooper_optimizer(
+        cooper_optimizer = testing.build_cooper_optimizer(
             cmp=self.cmp,
             primal_optimizers=primal_optimizers,
             extrapolation=extrapolation,
@@ -70,12 +69,12 @@ def test_manual_step(self, extrapolation, alternation_type):
 
         penalty_updater = None
         if self.is_augmented_lagrangian:
-            penalty_updater = MultiplicativePenaltyCoefficientUpdater(
+            penalty_updater = cooper.multipliers.MultiplicativePenaltyCoefficientUpdater(
                 growth_factor=PENALTY_GROWTH_FACTOR, violation_tolerance=PENALTY_VIOLATION_TOLERANCE
             )
 
         roll_kwargs = {"compute_cmp_state_kwargs": {"x": x}}
-        if alternation_type == cooper_test_utils.AlternationType.PRIMAL_DUAL:
+        if alternation_type == testing.AlternationType.PRIMAL_DUAL:
             roll_kwargs["compute_violations_kwargs"] = {"x": x}
 
         manual_x = torch.ones(self.num_variables, device=self.device)
@@ -90,7 +89,7 @@ def test_manual_step(self, extrapolation, alternation_type):
             if self.is_augmented_lagrangian:
                 penalty_updater.step(roll_out.cmp_state.observed_constraints)
 
-            if alternation_type == cooper_test_utils.AlternationType.PRIMAL_DUAL:
+            if alternation_type == testing.AlternationType.PRIMAL_DUAL:
                 observed_multipliers = torch.cat(list(roll_out.dual_lagrangian_store.observed_multiplier_values()))
             else:
                 observed_multipliers = torch.cat(list(roll_out.primal_lagrangian_store.observed_multiplier_values()))
@@ -197,7 +196,7 @@ def _dual_lagrangian(self, x, multiplier, strict_features, penalty_coeff=None):
         return torch.sum(penalty_coeff[strict_features] * multiplier[strict_features] * violation)
 
     def _update_penalty_coefficients(self, x, x_prev, strict_features, alternation_type, penalty_coeff):
-        if alternation_type == cooper_test_utils.AlternationType.PRIMAL_DUAL:
+        if alternation_type == testing.AlternationType.PRIMAL_DUAL:
             strict_violation = self._violation(x, strict=True)[strict_features]
         else:
             strict_violation = self._violation(x_prev, strict=True)[strict_features]
@@ -263,10 +262,10 @@ def _extragradient_roll(self, x, multiplier, features, strict_features):
     def manual_roll(self, x, multiplier, features, strict_features, alternation_type, penalty_coeff, extrapolation):
         if extrapolation:
             return self._extragradient_roll(x, multiplier, features, strict_features)
-        if alternation_type == cooper_test_utils.AlternationType.FALSE:
+        if alternation_type == testing.AlternationType.FALSE:
             return self._simultaneous_roll(x, multiplier, features, strict_features)
-        if alternation_type == cooper_test_utils.AlternationType.DUAL_PRIMAL:
+        if alternation_type == testing.AlternationType.DUAL_PRIMAL:
             return self._dual_primal_roll(x, multiplier, features, strict_features, penalty_coeff)
-        if alternation_type == cooper_test_utils.AlternationType.PRIMAL_DUAL:
+        if alternation_type == testing.AlternationType.PRIMAL_DUAL:
             return self._primal_dual_roll(x, multiplier, features, strict_features, penalty_coeff)
         raise ValueError(f"Unknown alternation type: {alternation_type}")
diff --git a/tests/setup.cfg b/tests/setup.cfg
deleted file mode 100644
index 1f826159..00000000
--- a/tests/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[tool:pytest]
-norecursedirs=tests/helpers
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index 8c5ecef7..00000000
--- a/tox.ini
+++ /dev/null
@@ -1,40 +0,0 @@
-[tox]
-minversion = 3.9.0
-envlist =  python{3.9, 3.10}-torch{13}-{linux,macos,windows}, python{3.9, 3.10, 3.11}-torch{20, 21}-{linux,macos,windows}, lint, mypy
-isolated_build = true
-
-[gh-actions]
-python =
-    3.9: python3.9
-    3.10: python3.10, lint, mypy
-    3.11: python3.11
-
-[gh-actions:env]
-PLATFORM =
-    ubuntu-latest: linux
-    macos-latest: macos
-    windows-latest: windows
-
-[testenv]
-setenv =
-    PYTHONPATH = {toxinidir}
-extras = tests
-whitelist_externals = pytest
-deps =
-    torch13: torch == 1.13.1
-    torch20: torch == 2.0.0
-    torch21: torch == 2.1.1
-commands =
-    pytest --basetemp={envtmpdir}
-
-[testenv:lint]
-basepython = python3.10
-extras = dev
-commands =
-    flake8 cooper --count --exit-zero --statistics
-    black --check --diff .
-    isort cooper tutorials tests
-
-[testenv:mypy]
-basepython = python3.10
-commands = mypy cooper