Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci(wheel): build PT OPs #3894

Merged
merged 36 commits into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
3c924b2
automatically set pytorch requires
njzjz Jun 20, 2024
63c1bd7
remove the error message when PT is not preinstalled
njzjz Jun 20, 2024
59f36af
pass pt version
njzjz Jun 21, 2024
84cf0f5
enable pytorch in cibuildwheel
njzjz Jun 21, 2024
8286b9e
fix argument
njzjz Jun 21, 2024
4c28b75
fix build on macos/windows
njzjz Jun 21, 2024
e270adb
use CPU version for build
njzjz Jun 21, 2024
4e766d3
fix macosx_x86_64 error
njzjz Jun 21, 2024
14a87a0
exclude libc10 and libtorch
njzjz Jun 21, 2024
76225f0
fix openmp issue on macos-arm64
njzjz Jun 21, 2024
9675b4e
exclude libtorch_cpu.so
njzjz Jun 21, 2024
d1f47fd
fix CMAKE_ARGS
njzjz Jun 21, 2024
13404aa
inherit env
njzjz Jun 21, 2024
2e91ce8
mpich
njzjz Jun 21, 2024
ced2562
set pt dir to LD_LIBRARY_PATH
njzjz Jun 21, 2024
30b5377
fix typo
njzjz Jun 21, 2024
f9d4e52
prevent OP_CXX_ABI not defined
njzjz Jun 22, 2024
1ab3f92
Revert "prevent OP_CXX_ABI not defined"
njzjz Jun 22, 2024
55f04fb
Revert "fix typo"
njzjz Jun 22, 2024
a2e4070
Revert "set pt dir to LD_LIBRARY_PATH"
njzjz Jun 22, 2024
569a958
Revert "fix openmp issue on macos-arm64"
njzjz Jun 22, 2024
6ea694d
Revert "fix build on macos/windows"
njzjz Jun 22, 2024
ed4bcda
revert macos and windows
njzjz Jun 22, 2024
96affe0
base version
njzjz Jun 22, 2024
da61090
only parse version when version is not ""
njzjz Jun 22, 2024
a76082c
Merge remote-tracking branch 'origin/devel' into cibuildwheel-build-pt
njzjz Jun 22, 2024
b5f336d
Revert "revert macos and windows"
njzjz Jun 22, 2024
6306a41
skip api_cc
njzjz Jun 22, 2024
57fdbd4
test import
njzjz Jun 22, 2024
b42dd24
update build
njzjz Jun 22, 2024
e0c6059
update documentation
njzjz Jun 22, 2024
6a88f38
update documentation
njzjz Jun 22, 2024
6a9c294
Revert "Revert "fix openmp issue on macos-arm64""
njzjz Jun 22, 2024
4c38fad
version prefix matching
njzjz Jun 22, 2024
f9e2373
fix the typing of pt_path and tf_path
njzjz Jun 22, 2024
896e995
add torch to windows test-extras
njzjz Jun 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions backend/dp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

from scikit_build_core import build as _orig

from .find_pytorch import (
find_pytorch,
)
from .find_tensorflow import (
find_tensorflow,
)
Expand Down Expand Up @@ -40,10 +43,18 @@ def __dir__() -> List[str]:
def get_requires_for_build_wheel(
config_settings: dict,
) -> List[str]:
return _orig.get_requires_for_build_wheel(config_settings) + find_tensorflow()[1]
return (
_orig.get_requires_for_build_wheel(config_settings)
+ find_tensorflow()[1]
+ find_pytorch()[1]
)


def get_requires_for_build_editable(
config_settings: dict,
) -> List[str]:
return _orig.get_requires_for_build_editable(config_settings) + find_tensorflow()[1]
return (
_orig.get_requires_for_build_editable(config_settings)
+ find_tensorflow()[1]
+ find_pytorch()[1]
)
8 changes: 7 additions & 1 deletion backend/dynamic_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
Optional,
)

from .find_pytorch import (
get_pt_requirement,
)
from .find_tensorflow import (
get_tf_requirement,
)
Expand All @@ -33,7 +36,9 @@ def dynamic_metadata(
settings: Optional[Dict[str, object]] = None,
):
assert field in ["optional-dependencies", "entry-points", "scripts"]
_, _, find_libpython_requires, extra_scripts, tf_version = get_argument_from_env()
_, _, find_libpython_requires, extra_scripts, tf_version, pt_version = (
get_argument_from_env()
)
with Path("pyproject.toml").open("rb") as f:
pyproject = tomllib.load(f)

Expand All @@ -51,4 +56,5 @@ def dynamic_metadata(
return {
**optional_dependencies,
**get_tf_requirement(tf_version),
**get_pt_requirement(pt_version),
}
75 changes: 72 additions & 3 deletions backend/find_pytorch.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import importlib
import os
import site
from functools import (
Expand All @@ -17,12 +18,19 @@
get_path,
)
from typing import (
List,
Optional,
Tuple,
Union,
)

from packaging.version import (
Version,
)


@lru_cache
def find_pytorch() -> Optional[str]:
def find_pytorch() -> Tuple[Optional[str], List[str]]:
"""Find PyTorch library.

Tries to find PyTorch in the order of:
Expand All @@ -39,9 +47,12 @@ def find_pytorch() -> Optional[str]:
-------
str, optional
PyTorch library path if found.
list of str
TensorFlow requirement if not found. Empty if found.
"""
if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0":
return None
return None, []
requires = []
pt_spec = None

if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None:
Expand Down Expand Up @@ -73,4 +84,62 @@ def find_pytorch() -> Optional[str]:
# IndexError if submodule_search_locations is an empty list
except (AttributeError, TypeError, IndexError):
pt_install_dir = None
return pt_install_dir
requires.extend(get_pt_requirement()["torch"])
return pt_install_dir, requires


@lru_cache
def get_pt_requirement(pt_version: str = "") -> dict:
"""Get PyTorch requirement when PT is not installed.

If pt_version is not given and the environment variable `PYTORCH_VERSION` is set, use it as the requirement.

Parameters
----------
pt_version : str, optional
PT version

Returns
-------
dict
PyTorch requirement.
"""
if pt_version is None:
return {"torch": []}
if pt_version == "":
pt_version = os.environ.get("PYTORCH_VERSION", "")

return {
"torch": [
# uv has different local version behaviors, i.e. `==2.3.1` cannot match `==2.3.1+cpu`
# https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#local-version-identifiers
# luckily, .* (prefix matching) defined in PEP 440 can match any local version
# https://peps.python.org/pep-0440/#version-matching
f"torch=={Version(pt_version).base_version}.*"
if pt_version != ""
else "torch>=2a",
],
}
njzjz marked this conversation as resolved.
Show resolved Hide resolved


@lru_cache
def get_pt_version(pt_path: Union[str, Path]) -> str:
"""Get TF version from a TF Python library path.

Parameters
----------
pt_path : str or Path
PT Python library path

Returns
-------
str
version
"""
if pt_path is None or pt_path == "":
return ""
version_file = Path(pt_path) / "version.py"
spec = importlib.util.spec_from_file_location("torch.version", version_file)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module.__version__
14 changes: 9 additions & 5 deletions backend/read_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from .find_pytorch import (
find_pytorch,
get_pt_version,
)
from .find_tensorflow import (
find_tensorflow,
Expand All @@ -23,7 +24,7 @@


@lru_cache
def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
def get_argument_from_env() -> Tuple[str, list, list, dict, str, str]:
njzjz marked this conversation as resolved.
Show resolved Hide resolved
"""Get the arguments from environment variables.

The environment variables are assumed to be not changed during the build.
Expand All @@ -40,6 +41,8 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
The extra scripts to be installed.
str
The TensorFlow version.
str
The PyTorch version.
"""
cmake_args = []
extra_scripts = {}
Expand Down Expand Up @@ -103,9 +106,8 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
tf_version = None

if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1":
pt_install_dir = find_pytorch()
if pt_install_dir is None:
raise RuntimeError("Cannot find installed PyTorch.")
pt_install_dir, _ = find_pytorch()
pt_version = get_pt_version(pt_install_dir)
njzjz marked this conversation as resolved.
Show resolved Hide resolved
cmake_args.extend(
[
"-DENABLE_PYTORCH=ON",
Expand All @@ -114,6 +116,7 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
)
else:
cmake_args.append("-DENABLE_PYTORCH=OFF")
pt_version = None

cmake_args = [
"-DBUILD_PY_IF:BOOL=TRUE",
Expand All @@ -125,11 +128,12 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
find_libpython_requires,
extra_scripts,
tf_version,
pt_version,
)


def set_scikit_build_env():
"""Set scikit-build environment variables before executing scikit-build."""
cmake_minimum_required_version, cmake_args, _, _, _ = get_argument_from_env()
cmake_minimum_required_version, cmake_args, _, _, _, _ = get_argument_from_env()
os.environ["SKBUILD_CMAKE_MINIMUM_VERSION"] = cmake_minimum_required_version
os.environ["SKBUILD_CMAKE_ARGS"] = ";".join(cmake_args)
6 changes: 5 additions & 1 deletion doc/install/easy-install.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ pip install deepmd-kit[cpu]
pip install deepmd-kit[gpu,cu12,torch,lmp,ipi]
```

MPICH is required for parallel running. (The macOS arm64 package doesn't support MPI yet.)
MPICH is required for parallel running.

:::{Warning}
When installing from pip, only the TensorFlow {{ tensorflow_icon }} backend is supported with LAMMPS and i-PI.
:::

It is suggested to install the package into an isolated environment.
The supported platform includes Linux x86-64 and aarch64 with GNU C Library 2.28 or above, macOS x86-64 and arm64, and Windows x86-64.
Expand Down
47 changes: 36 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,6 @@ cu12 = [
"nvidia-cudnn-cu12<9",
"nvidia-cuda-nvcc-cu12",
]
torch = [
"torch>=2a",
]

[tool.deepmd_build_backend.scripts]
dp = "deepmd.main:main"
Expand Down Expand Up @@ -198,56 +195,84 @@ replacement = '\1="https://github.com/deepmodeling/deepmd-kit/raw/master/\g<2>"'
[tool.cibuildwheel]
test-command = [
"python -m deepmd -h",
"""python -c "import deepmd.tf;import deepmd.pt" """,
njzjz marked this conversation as resolved.
Show resolved Hide resolved
"dp -h",
"dp_ipi",
"pytest {project}/source/tests/tf/test_lammps.py"
]
test-extras = ["cpu", "test", "lmp", "ipi"]
build = ["cp310-*"]
test-extras = ["cpu", "test", "lmp", "ipi", "torch"]
build = ["cp311-*"]
skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"]
# TODO: uncomment to use the latest image when CUDA 11 is deprecated
# manylinux-x86_64-image = "manylinux_2_28"
manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81"
manylinux-aarch64-image = "manylinux_2_28"

[tool.cibuildwheel.macos]
environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1" }
before-all = [
"""brew install mpich""",
'''pip install -i https://pypi.anaconda.org/mpi4py/simple mpich''',
njzjz marked this conversation as resolved.
Show resolved Hide resolved
]
repair-wheel-command = """delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} --ignore-missing-dependencies"""

[tool.cibuildwheel.macos.environment]
PIP_PREFER_BINARY = "1"
DP_LAMMPS_VERSION = "stable_2Aug2023_update3"
DP_ENABLE_IPI = "1"
DP_ENABLE_PYTORCH = "1"
# for unclear reason, when enabling PyTorch, OpenMP is found accidentally
CMAKE_ARGS = "-DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=1"

[[tool.cibuildwheel.overrides]]
# error: 'value' is unavailable: introduced in macOS 10.13
select = "*-macosx_x86_64"
inherit.environment = "append"
environment.MACOSX_DEPLOYMENT_TARGET = "10.13"

[tool.cibuildwheel.linux]
repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 -w {dest_dir} {wheel}"
repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 --exclude libc10.so --exclude libtorch.so --exclude libtorch_cpu.so -w {dest_dir} {wheel}"
environment-pass = [
"CIBW_BUILD",
"DP_VARIANT",
"CUDA_VERSION",
"DP_PKG_NAME",
"SETUPTOOLS_SCM_PRETEND_VERSION",
]
environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" }
before-all = [
"""if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \\"deepmd-kit\\"/name = \\"${DP_PKG_NAME}\\"/g" pyproject.toml; fi""",
# https://almalinux.org/blog/2023-12-20-almalinux-8-key-update/
"""rpm --import https://repo.almalinux.org/almalinux/RPM-GPG-KEY-AlmaLinux""",
"""{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""",
"yum install -y mpich-devel",
'''/opt/python/cp311-cp311/bin/python -m pip install -i https://pypi.anaconda.org/mpi4py/simple mpich''',
# uv is not available in the old manylinux image
"""{ if [ "$(uname -m)" = "x86_64" ] ; then pipx install uv; fi }""",
]
before-build = [
# old build doesn't support uv
"""{ if [ "$(uname -m)" = "x86_64" ] ; then uv pip install --system -U build; fi }""",
]
[tool.cibuildwheel.linux.environment]
PIP_PREFER_BINARY = "1"
DP_LAMMPS_VERSION = "stable_2Aug2023_update3"
DP_ENABLE_IPI = "1"
DP_ENABLE_PYTORCH = "1"
MPI_HOME = "/usr/lib64/mpich"
PATH = "/usr/lib64/mpich/bin:$PATH"
# use CPU version of torch for building, which should also work for GPU
# note: uv has different behavior from pip on extra index url
# https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#packages-that-exist-on-multiple-indexes
UV_EXTRA_INDEX_URL = "https://download.pytorch.org/whl/cpu"
# trick to find the correction version of mpich
CMAKE_PREFIX_PATH="/opt/python/cp311-cp311/"

[tool.cibuildwheel.windows]
environment = { PIP_PREFER_BINARY="1" }
test-extras = ["cpu"]
test-command = [
"python -m deepmd -h",
"dp -h",
]
[tool.cibuildwheel.windows.environment]
PIP_PREFER_BINARY = "1"
DP_ENABLE_PYTORCH = "1"

# One can run `tox` or `tox -e gpu`
# to run pytest in an isolated environment
Expand Down
5 changes: 4 additions & 1 deletion source/api_cc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ if(ENABLE_TENSORFLOW)
TensorFlow::tensorflow_framework)
target_compile_definitions(${libname} PRIVATE BUILD_TENSORFLOW)
endif()
if(ENABLE_PYTORCH AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}")
if(ENABLE_PYTORCH
AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}"
# LAMMPS and i-PI in the Python package are not ready - needs more work
AND NOT BUILD_PY_IF)
target_link_libraries(${libname} PRIVATE "${TORCH_LIBRARIES}")
target_compile_definitions(${libname} PRIVATE BUILD_PYTORCH)
endif()
Expand Down
Loading