Skip to content

Commit

Permalink
ci(wheel): build PT OPs (deepmodeling#3894)
Browse files Browse the repository at this point in the history
Build PT OPs for our wheels (against the CPU version of PT but work with
the GPU version). Add PT to the build dependencies if it is not found.
Bump MPICH to the latest version. Update documentation to address that
PT support for LAMMPS and i-PI is not included.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced PyTorch support across various backend functionalities,
enhancing compatibility and capabilities alongside existing TensorFlow
support.

- **Documentation**
- Updated installation guide to include warnings about TensorFlow
backend support and removed outdated notes about macOS arm64 package
support.

- **Chores**
- Updated dependency versions and build configurations to include new
PyTorch settings and improved compatibility for macOS and Linux.
- Adjusted build scripts to conditionally link libraries based on new
PyTorch-related parameters.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <[email protected]>
  • Loading branch information
njzjz authored and Mathieu Taillefumier committed Sep 18, 2024
1 parent 10b752f commit ad226aa
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 26 deletions.
15 changes: 13 additions & 2 deletions backend/dp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

from scikit_build_core import build as _orig

from .find_pytorch import (
find_pytorch,
)
from .find_tensorflow import (
find_tensorflow,
)
Expand Down Expand Up @@ -40,10 +43,18 @@ def __dir__() -> List[str]:
def get_requires_for_build_wheel(
config_settings: dict,
) -> List[str]:
return _orig.get_requires_for_build_wheel(config_settings) + find_tensorflow()[1]
return (
_orig.get_requires_for_build_wheel(config_settings)
+ find_tensorflow()[1]
+ find_pytorch()[1]
)


def get_requires_for_build_editable(
config_settings: dict,
) -> List[str]:
return _orig.get_requires_for_build_editable(config_settings) + find_tensorflow()[1]
return (
_orig.get_requires_for_build_editable(config_settings)
+ find_tensorflow()[1]
+ find_pytorch()[1]
)
8 changes: 7 additions & 1 deletion backend/dynamic_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
Optional,
)

from .find_pytorch import (
get_pt_requirement,
)
from .find_tensorflow import (
get_tf_requirement,
)
Expand All @@ -33,7 +36,9 @@ def dynamic_metadata(
settings: Optional[Dict[str, object]] = None,
):
assert field in ["optional-dependencies", "entry-points", "scripts"]
_, _, find_libpython_requires, extra_scripts, tf_version = get_argument_from_env()
_, _, find_libpython_requires, extra_scripts, tf_version, pt_version = (
get_argument_from_env()
)
with Path("pyproject.toml").open("rb") as f:
pyproject = tomllib.load(f)

Expand All @@ -51,4 +56,5 @@ def dynamic_metadata(
return {
**optional_dependencies,
**get_tf_requirement(tf_version),
**get_pt_requirement(pt_version),
}
75 changes: 72 additions & 3 deletions backend/find_pytorch.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import importlib
import os
import site
from functools import (
Expand All @@ -17,12 +18,19 @@
get_path,
)
from typing import (
List,
Optional,
Tuple,
Union,
)

from packaging.version import (
Version,
)


@lru_cache
def find_pytorch() -> Optional[str]:
def find_pytorch() -> Tuple[Optional[str], List[str]]:
"""Find PyTorch library.
Tries to find PyTorch in the order of:
Expand All @@ -39,9 +47,12 @@ def find_pytorch() -> Optional[str]:
-------
str, optional
PyTorch library path if found.
list of str
TensorFlow requirement if not found. Empty if found.
"""
if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0":
return None
return None, []
requires = []
pt_spec = None

if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None:
Expand Down Expand Up @@ -73,4 +84,62 @@ def find_pytorch() -> Optional[str]:
# IndexError if submodule_search_locations is an empty list
except (AttributeError, TypeError, IndexError):
pt_install_dir = None
return pt_install_dir
requires.extend(get_pt_requirement()["torch"])
return pt_install_dir, requires


@lru_cache
def get_pt_requirement(pt_version: str = "") -> dict:
"""Get PyTorch requirement when PT is not installed.
If pt_version is not given and the environment variable `PYTORCH_VERSION` is set, use it as the requirement.
Parameters
----------
pt_version : str, optional
PT version
Returns
-------
dict
PyTorch requirement.
"""
if pt_version is None:
return {"torch": []}
if pt_version == "":
pt_version = os.environ.get("PYTORCH_VERSION", "")

return {
"torch": [
# uv has different local version behaviors, i.e. `==2.3.1` cannot match `==2.3.1+cpu`
# https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#local-version-identifiers
# luckily, .* (prefix matching) defined in PEP 440 can match any local version
# https://peps.python.org/pep-0440/#version-matching
f"torch=={Version(pt_version).base_version}.*"
if pt_version != ""
else "torch>=2a",
],
}


@lru_cache
def get_pt_version(pt_path: Optional[Union[str, Path]]) -> str:
"""Get TF version from a TF Python library path.
Parameters
----------
pt_path : str or Path
PT Python library path
Returns
-------
str
version
"""
if pt_path is None or pt_path == "":
return ""
version_file = Path(pt_path) / "version.py"
spec = importlib.util.spec_from_file_location("torch.version", version_file)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module.__version__
2 changes: 1 addition & 1 deletion backend/find_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def get_tf_requirement(tf_version: str = "") -> dict:


@lru_cache
def get_tf_version(tf_path: Union[str, Path]) -> str:
def get_tf_version(tf_path: Optional[Union[str, Path]]) -> str:
"""Get TF version from a TF Python library path.
Parameters
Expand Down
14 changes: 9 additions & 5 deletions backend/read_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from .find_pytorch import (
find_pytorch,
get_pt_version,
)
from .find_tensorflow import (
find_tensorflow,
Expand All @@ -23,7 +24,7 @@


@lru_cache
def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
def get_argument_from_env() -> Tuple[str, list, list, dict, str, str]:
"""Get the arguments from environment variables.
The environment variables are assumed to be not changed during the build.
Expand All @@ -40,6 +41,8 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
The extra scripts to be installed.
str
The TensorFlow version.
str
The PyTorch version.
"""
cmake_args = []
extra_scripts = {}
Expand Down Expand Up @@ -103,9 +106,8 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
tf_version = None

if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1":
pt_install_dir = find_pytorch()
if pt_install_dir is None:
raise RuntimeError("Cannot find installed PyTorch.")
pt_install_dir, _ = find_pytorch()
pt_version = get_pt_version(pt_install_dir)
cmake_args.extend(
[
"-DENABLE_PYTORCH=ON",
Expand All @@ -114,6 +116,7 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
)
else:
cmake_args.append("-DENABLE_PYTORCH=OFF")
pt_version = None

cmake_args = [
"-DBUILD_PY_IF:BOOL=TRUE",
Expand All @@ -125,11 +128,12 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
find_libpython_requires,
extra_scripts,
tf_version,
pt_version,
)


def set_scikit_build_env():
"""Set scikit-build environment variables before executing scikit-build."""
cmake_minimum_required_version, cmake_args, _, _, _ = get_argument_from_env()
cmake_minimum_required_version, cmake_args, _, _, _, _ = get_argument_from_env()
os.environ["SKBUILD_CMAKE_MINIMUM_VERSION"] = cmake_minimum_required_version
os.environ["SKBUILD_CMAKE_ARGS"] = ";".join(cmake_args)
6 changes: 5 additions & 1 deletion doc/install/easy-install.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ pip install deepmd-kit[cpu]
pip install deepmd-kit[gpu,cu12,torch,lmp,ipi]
```

MPICH is required for parallel running. (The macOS arm64 package doesn't support MPI yet.)
MPICH is required for parallel running.

:::{Warning}
When installing from pip, only the TensorFlow {{ tensorflow_icon }} backend is supported with LAMMPS and i-PI.
:::

It is suggested to install the package into an isolated environment.
The supported platform includes Linux x86-64 and aarch64 with GNU C Library 2.28 or above, macOS x86-64 and arm64, and Windows x86-64.
Expand Down
49 changes: 37 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,6 @@ cu12 = [
"nvidia-cudnn-cu12<9",
"nvidia-cuda-nvcc-cu12",
]
torch = [
"torch>=2a",
]

[tool.deepmd_build_backend.scripts]
dp = "deepmd.main:main"
Expand Down Expand Up @@ -198,56 +195,84 @@ replacement = '\1="https://github.com/deepmodeling/deepmd-kit/raw/master/\g<2>"'
[tool.cibuildwheel]
test-command = [
"python -m deepmd -h",
"""python -c "import deepmd.tf;import deepmd.pt" """,
"dp -h",
"dp_ipi",
"pytest {project}/source/tests/tf/test_lammps.py"
]
test-extras = ["cpu", "test", "lmp", "ipi"]
build = ["cp310-*"]
test-extras = ["cpu", "test", "lmp", "ipi", "torch"]
build = ["cp311-*"]
skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"]
# TODO: uncomment to use the latest image when CUDA 11 is deprecated
# manylinux-x86_64-image = "manylinux_2_28"
manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81"
manylinux-aarch64-image = "manylinux_2_28"

[tool.cibuildwheel.macos]
environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1" }
before-all = [
"""brew install mpich""",
'''pip install -i https://pypi.anaconda.org/mpi4py/simple mpich''',
]
repair-wheel-command = """delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} --ignore-missing-dependencies"""

[tool.cibuildwheel.macos.environment]
PIP_PREFER_BINARY = "1"
DP_LAMMPS_VERSION = "stable_2Aug2023_update3"
DP_ENABLE_IPI = "1"
DP_ENABLE_PYTORCH = "1"
# for unclear reason, when enabling PyTorch, OpenMP is found accidentally
CMAKE_ARGS = "-DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=1"

[[tool.cibuildwheel.overrides]]
# error: 'value' is unavailable: introduced in macOS 10.13
select = "*-macosx_x86_64"
inherit.environment = "append"
environment.MACOSX_DEPLOYMENT_TARGET = "10.13"

[tool.cibuildwheel.linux]
repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 -w {dest_dir} {wheel}"
repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 --exclude libc10.so --exclude libtorch.so --exclude libtorch_cpu.so -w {dest_dir} {wheel}"
environment-pass = [
"CIBW_BUILD",
"DP_VARIANT",
"CUDA_VERSION",
"DP_PKG_NAME",
"SETUPTOOLS_SCM_PRETEND_VERSION",
]
environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" }
before-all = [
"""if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \\"deepmd-kit\\"/name = \\"${DP_PKG_NAME}\\"/g" pyproject.toml; fi""",
# https://almalinux.org/blog/2023-12-20-almalinux-8-key-update/
"""rpm --import https://repo.almalinux.org/almalinux/RPM-GPG-KEY-AlmaLinux""",
"""{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""",
"yum install -y mpich-devel",
'''/opt/python/cp311-cp311/bin/python -m pip install -i https://pypi.anaconda.org/mpi4py/simple mpich''',
# uv is not available in the old manylinux image
"""{ if [ "$(uname -m)" = "x86_64" ] ; then pipx install uv; fi }""",
]
before-build = [
# old build doesn't support uv
"""{ if [ "$(uname -m)" = "x86_64" ] ; then uv pip install --system -U build; fi }""",
]
[tool.cibuildwheel.linux.environment]
PIP_PREFER_BINARY = "1"
DP_LAMMPS_VERSION = "stable_2Aug2023_update3"
DP_ENABLE_IPI = "1"
DP_ENABLE_PYTORCH = "1"
MPI_HOME = "/usr/lib64/mpich"
PATH = "/usr/lib64/mpich/bin:$PATH"
# use CPU version of torch for building, which should also work for GPU
# note: uv has different behavior from pip on extra index url
# https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#packages-that-exist-on-multiple-indexes
UV_EXTRA_INDEX_URL = "https://download.pytorch.org/whl/cpu"
# trick to find the correction version of mpich
CMAKE_PREFIX_PATH="/opt/python/cp311-cp311/"

[tool.cibuildwheel.windows]
environment = { PIP_PREFER_BINARY="1" }
test-extras = ["cpu"]
test-extras = ["cpu", "torch"]
test-command = [
"python -m deepmd -h",
"dp -h",
]
[tool.cibuildwheel.windows.environment]
PIP_PREFER_BINARY = "1"
DP_ENABLE_PYTORCH = "1"

# One can run `tox` or `tox -e gpu`
# to run pytest in an isolated environment
Expand Down
5 changes: 4 additions & 1 deletion source/api_cc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ if(ENABLE_TENSORFLOW)
TensorFlow::tensorflow_framework)
target_compile_definitions(${libname} PRIVATE BUILD_TENSORFLOW)
endif()
if(ENABLE_PYTORCH AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}")
if(ENABLE_PYTORCH
AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}"
# LAMMPS and i-PI in the Python package are not ready - needs more work
AND NOT BUILD_PY_IF)
target_link_libraries(${libname} PRIVATE "${TORCH_LIBRARIES}")
target_compile_definitions(${libname} PRIVATE BUILD_PYTORCH)
endif()
Expand Down

0 comments on commit ad226aa

Please sign in to comment.