deepmodeling · wanghan-iapcm · Jun 23, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 21, 2024
diff --git a/backend/dp_backend.py b/backend/dp_backend.py
@@ -7,6 +7,9 @@
 
 from scikit_build_core import build as _orig
 
+from .find_pytorch import (
+    find_pytorch,
+)
 from .find_tensorflow import (
     find_tensorflow,
 )
@@ -40,10 +43,18 @@ def __dir__() -> List[str]:
 def get_requires_for_build_wheel(
     config_settings: dict,
 ) -> List[str]:
-    return _orig.get_requires_for_build_wheel(config_settings) + find_tensorflow()[1]
+    return (
+        _orig.get_requires_for_build_wheel(config_settings)
+        + find_tensorflow()[1]
+        + find_pytorch()[1]
+    )
 
 
 def get_requires_for_build_editable(
     config_settings: dict,
 ) -> List[str]:
-    return _orig.get_requires_for_build_editable(config_settings) + find_tensorflow()[1]
+    return (
+        _orig.get_requires_for_build_editable(config_settings)
+        + find_tensorflow()[1]
+        + find_pytorch()[1]
+    )
diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py
@@ -9,6 +9,9 @@
     Optional,
 )
 
+from .find_pytorch import (
+    get_pt_requirement,
+)
 from .find_tensorflow import (
     get_tf_requirement,
 )
@@ -33,7 +36,9 @@ def dynamic_metadata(
     settings: Optional[Dict[str, object]] = None,
 ):
     assert field in ["optional-dependencies", "entry-points", "scripts"]
-    _, _, find_libpython_requires, extra_scripts, tf_version = get_argument_from_env()
+    _, _, find_libpython_requires, extra_scripts, tf_version, pt_version = (
+        get_argument_from_env()
+    )
     with Path("pyproject.toml").open("rb") as f:
         pyproject = tomllib.load(f)
 
@@ -51,4 +56,5 @@ def dynamic_metadata(
         return {
             **optional_dependencies,
             **get_tf_requirement(tf_version),
+            **get_pt_requirement(pt_version),
         }
diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import importlib
 import os
 import site
 from functools import (
@@ -17,12 +18,19 @@
     get_path,
 )
 from typing import (
+    List,
     Optional,
+    Tuple,
+    Union,
+)
+
+from packaging.version import (
+    Version,
 )
 
 
 @lru_cache
-def find_pytorch() -> Optional[str]:
+def find_pytorch() -> Tuple[Optional[str], List[str]]:
     """Find PyTorch library.
 
     Tries to find PyTorch in the order of:
@@ -39,9 +47,12 @@ def find_pytorch() -> Optional[str]:
     -------
     str, optional
         PyTorch library path if found.
+    list of str
+        TensorFlow requirement if not found. Empty if found.
     """
     if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0":
-        return None
+        return None, []
+    requires = []
     pt_spec = None
 
     if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None:
@@ -73,4 +84,62 @@ def find_pytorch() -> Optional[str]:
         # IndexError if submodule_search_locations is an empty list
     except (AttributeError, TypeError, IndexError):
         pt_install_dir = None
-    return pt_install_dir
+        requires.extend(get_pt_requirement()["torch"])
+    return pt_install_dir, requires
+
+
+@lru_cache
+def get_pt_requirement(pt_version: str = "") -> dict:
+    """Get PyTorch requirement when PT is not installed.
+
+    If pt_version is not given and the environment variable `PYTORCH_VERSION` is set, use it as the requirement.
+
+    Parameters
+    ----------
+    pt_version : str, optional
+        PT version
+
+    Returns
+    -------
+    dict
+        PyTorch requirement.
+    """
+    if pt_version is None:
+        return {"torch": []}
+    if pt_version == "":
+        pt_version = os.environ.get("PYTORCH_VERSION", "")
+
+    return {
+        "torch": [
+            # uv has different local version behaviors, i.e. `==2.3.1` cannot match `==2.3.1+cpu`
+            # https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#local-version-identifiers
+            # luckily, .* (prefix matching) defined in PEP 440 can match any local version
+            # https://peps.python.org/pep-0440/#version-matching
+            f"torch=={Version(pt_version).base_version}.*"
+            if pt_version != ""
+            else "torch>=2a",
+        ],
+    }
+
+
+@lru_cache
+def get_pt_version(pt_path: Union[str, Path]) -> str:
+    """Get TF version from a TF Python library path.
+
+    Parameters
+    ----------
+    pt_path : str or Path
+        PT Python library path
+
+    Returns
+    -------
+    str
+        version
+    """
+    if pt_path is None or pt_path == "":
+        return ""
+    version_file = Path(pt_path) / "version.py"
+    spec = importlib.util.spec_from_file_location("torch.version", version_file)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module.__version__
diff --git a/backend/read_env.py b/backend/read_env.py
@@ -15,6 +15,7 @@
 
 from .find_pytorch import (
     find_pytorch,
+    get_pt_version,
 )
 from .find_tensorflow import (
     find_tensorflow,
@@ -23,7 +24,7 @@
 
 
 @lru_cache
-def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
+def get_argument_from_env() -> Tuple[str, list, list, dict, str, str]:
     """Get the arguments from environment variables.
 
     The environment variables are assumed to be not changed during the build.
@@ -40,6 +41,8 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
         The extra scripts to be installed.
     str
         The TensorFlow version.
+    str
+        The PyTorch version.
     """
     cmake_args = []
     extra_scripts = {}
@@ -103,9 +106,8 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
         tf_version = None
 
     if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1":
-        pt_install_dir = find_pytorch()
-        if pt_install_dir is None:
-            raise RuntimeError("Cannot find installed PyTorch.")
+        pt_install_dir, _ = find_pytorch()
+        pt_version = get_pt_version(pt_install_dir)
         cmake_args.extend(
             [
                 "-DENABLE_PYTORCH=ON",
@@ -114,6 +116,7 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
         )
     else:
         cmake_args.append("-DENABLE_PYTORCH=OFF")
+        pt_version = None
 
     cmake_args = [
         "-DBUILD_PY_IF:BOOL=TRUE",
@@ -125,11 +128,12 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
         find_libpython_requires,
         extra_scripts,
         tf_version,
+        pt_version,
     )
 
 
 def set_scikit_build_env():
     """Set scikit-build environment variables before executing scikit-build."""
-    cmake_minimum_required_version, cmake_args, _, _, _ = get_argument_from_env()
+    cmake_minimum_required_version, cmake_args, _, _, _, _ = get_argument_from_env()
     os.environ["SKBUILD_CMAKE_MINIMUM_VERSION"] = cmake_minimum_required_version
     os.environ["SKBUILD_CMAKE_ARGS"] = ";".join(cmake_args)
diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
@@ -132,7 +132,11 @@ pip install deepmd-kit[cpu]
 pip install deepmd-kit[gpu,cu12,torch,lmp,ipi]
 ```
 
-MPICH is required for parallel running. (The macOS arm64 package doesn't support MPI yet.)
+MPICH is required for parallel running.
+
+:::{Warning}
+When installing from pip, only the TensorFlow {{ tensorflow_icon }} backend is supported with LAMMPS and i-PI.
+:::
 
 It is suggested to install the package into an isolated environment.
 The supported platform includes Linux x86-64 and aarch64 with GNU C Library 2.28 or above, macOS x86-64 and arm64, and Windows x86-64.

diff --git a/pyproject.toml b/pyproject.toml
@@ -126,9 +126,6 @@ cu12 = [
     "nvidia-cudnn-cu12<9",
     "nvidia-cuda-nvcc-cu12",
 ]
-torch = [
-    "torch>=2a",
-]
 
 [tool.deepmd_build_backend.scripts]
 dp = "deepmd.main:main"
@@ -198,56 +195,84 @@ replacement = '\1="https://github.com/deepmodeling/deepmd-kit/raw/master/\g<2>"'
 [tool.cibuildwheel]
 test-command = [
     "python -m deepmd -h",
+    """python -c "import deepmd.tf;import deepmd.pt" """,
     "dp -h",
     "dp_ipi",
     "pytest {project}/source/tests/tf/test_lammps.py"
 ]
-test-extras = ["cpu", "test", "lmp", "ipi"]
-build = ["cp310-*"]
+test-extras = ["cpu", "test", "lmp", "ipi", "torch"]
+build = ["cp311-*"]
 skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"]
 # TODO: uncomment to use the latest image when CUDA 11 is deprecated
 # manylinux-x86_64-image = "manylinux_2_28"
 manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81"
 manylinux-aarch64-image = "manylinux_2_28"
 
 [tool.cibuildwheel.macos]
-environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1" }
 before-all = [
-    """brew install mpich""",
+    '''pip install -i https://pypi.anaconda.org/mpi4py/simple mpich''',
 ]
 repair-wheel-command = """delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} --ignore-missing-dependencies"""
 
+[tool.cibuildwheel.macos.environment]
+PIP_PREFER_BINARY = "1"
+DP_LAMMPS_VERSION = "stable_2Aug2023_update3"
+DP_ENABLE_IPI = "1"
+DP_ENABLE_PYTORCH = "1"
+# for unclear reason, when enabling PyTorch, OpenMP is found accidentally
+CMAKE_ARGS = "-DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=1"
+
+[[tool.cibuildwheel.overrides]]
+# error: 'value' is unavailable: introduced in macOS 10.13
+select = "*-macosx_x86_64"
+inherit.environment = "append"
+environment.MACOSX_DEPLOYMENT_TARGET = "10.13"
+
 [tool.cibuildwheel.linux]
-repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 -w {dest_dir} {wheel}"
+repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 --exclude libc10.so --exclude libtorch.so --exclude libtorch_cpu.so -w {dest_dir} {wheel}"
 environment-pass = [
     "CIBW_BUILD",
     "DP_VARIANT",
     "CUDA_VERSION",
     "DP_PKG_NAME",
     "SETUPTOOLS_SCM_PRETEND_VERSION",
 ]
-environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" }
 before-all = [
     """if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \\"deepmd-kit\\"/name = \\"${DP_PKG_NAME}\\"/g" pyproject.toml; fi""",
     # https://almalinux.org/blog/2023-12-20-almalinux-8-key-update/
     """rpm --import https://repo.almalinux.org/almalinux/RPM-GPG-KEY-AlmaLinux""",
     """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""",
-    "yum install -y mpich-devel",
+    '''/opt/python/cp311-cp311/bin/python -m pip install -i https://pypi.anaconda.org/mpi4py/simple mpich''',
     # uv is not available in the old manylinux image
     """{ if [ "$(uname -m)" = "x86_64" ] ; then pipx install uv; fi }""",
 ]
 before-build = [
     # old build doesn't support uv
     """{ if [ "$(uname -m)" = "x86_64" ] ; then uv pip install --system -U build; fi }""",
 ]
+[tool.cibuildwheel.linux.environment]
+PIP_PREFER_BINARY = "1"
+DP_LAMMPS_VERSION = "stable_2Aug2023_update3"
+DP_ENABLE_IPI = "1"
+DP_ENABLE_PYTORCH = "1"
+MPI_HOME = "/usr/lib64/mpich"
+PATH = "/usr/lib64/mpich/bin:$PATH"
+# use CPU version of torch for building, which should also work for GPU
+# note: uv has different behavior from pip on extra index url
+# https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#packages-that-exist-on-multiple-indexes
+UV_EXTRA_INDEX_URL = "https://download.pytorch.org/whl/cpu"
+# trick to find the correction version of mpich
+CMAKE_PREFIX_PATH="/opt/python/cp311-cp311/"
 
 [tool.cibuildwheel.windows]
-environment = { PIP_PREFER_BINARY="1" }
 test-extras = ["cpu"]
 test-command = [
     "python -m deepmd -h",
     "dp -h",
 ]
+[tool.cibuildwheel.windows.environment]
+PIP_PREFER_BINARY = "1"
+DP_ENABLE_PYTORCH = "1"
 
 # One can run `tox` or `tox -e gpu`
 # to run pytest in an isolated environment

diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt
@@ -16,7 +16,10 @@ if(ENABLE_TENSORFLOW)
                                            TensorFlow::tensorflow_framework)
   target_compile_definitions(${libname} PRIVATE BUILD_TENSORFLOW)
 endif()
-if(ENABLE_PYTORCH AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}")
+if(ENABLE_PYTORCH
+   AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}"
+       # LAMMPS and i-PI in the Python package are not ready - needs more work
+   AND NOT BUILD_PY_IF)
   target_link_libraries(${libname} PRIVATE "${TORCH_LIBRARIES}")
   target_compile_definitions(${libname} PRIVATE BUILD_PYTORCH)
 endif()