Merge branch 'devel' into speed-up-test-cuda

Signed-off-by: Jinzhe Zeng <[email protected]>
deepmodeling · Feb 17, 2024 · 40e597e · 40e597e
2 parents ae1c3f5 + cd1a957
commit 40e597e
Show file tree

Hide file tree

Showing 25 changed files with 362 additions and 117 deletions.
diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml
@@ -1,6 +1,10 @@
 on:
   push:
   pull_request:
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 name: Build C++
 jobs:
   buildcc:

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
@@ -3,6 +3,11 @@ name: Build and upload to PyPI
 on:
   push:
   pull_request:
+  merge_group:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 
 jobs:
   determine-arm64-runner:

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -5,7 +5,9 @@ on:
   pull_request:
   schedule:
     - cron: '45 2 * * 2'
-
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 jobs:
   analyze:
     name: Analyze

diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
@@ -11,4 +11,4 @@ jobs:
     steps:
     - uses: actions/labeler@v5
       with:
-        repo-token: "${{ secrets.GITHUB_TOKEN }}"
+        repo-token: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml
@@ -3,7 +3,10 @@ name: Build C library
 on:
   push:
   pull_request:
-
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 jobs:
   build_c:
     name: Build C library

diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
@@ -1,6 +1,10 @@
 on:
   push:
   pull_request:
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 name: Test C++
 jobs:
   testcc:

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
@@ -4,6 +4,12 @@ on:
   pull_request:
     types:
       - "labeled"
+      # to let the PR pass the test
+      - "synchronize"
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 name: Test CUDA
 jobs:
   test_cuda:
@@ -13,7 +19,7 @@ jobs:
     container:
       image: nvidia/cuda:12.3.1-devel-ubuntu22.04
       options: --gpus all
-    if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch'
+    if: github.repository_owner == 'deepmodeling' && (github.event_name == 'pull_request' && github.event.label && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' || github.event_name == 'merge_group')
     steps:
     - name: Make sudo and git work
       run: apt-get update && apt-get install -y sudo git
@@ -64,3 +70,13 @@ jobs:
         TF_INTRA_OP_PARALLELISM_THREADS: 1
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
+  pass:
+    name: Pass testing on CUDA
+    needs: [test_cuda]
+    runs-on: ubuntu-latest
+    if: always()
+    steps:
+    - name: Decide whether the needed jobs succeeded or failed
+      uses: re-actors/alls-green@release/v1
+      with:
+        jobs: ${{ toJSON(needs) }}
diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
@@ -1,6 +1,10 @@
 on:
   push:
   pull_request:
+  merge_group:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: true
 name: Test Python
 jobs:
   testpython:
@@ -27,8 +31,6 @@ jobs:
         mpi: openmpi
     # https://github.com/pypa/pip/issues/11770
     - run: python -m pip install -U "pip>=21.3.1,!=23.0.0"
-    - run: python -m pip install -U "torch==${{ matrix.torch }}" "numpy<1.20"
-      if: matrix.torch != ''
     - run: pip install -e .[cpu,test,torch]
       env:
         TENSORFLOW_VERSION: ${{ matrix.tf }}

diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -22,14 +22,12 @@
     EmbeddingNet,
     EnvMat,
     NetworkCollection,
+    PairExcludeMask,
 )
 
 from .base_descriptor import (
     BaseDescriptor,
 )
-from .exclude_mask import (
-    ExcludeMask,
-)
 
 
 class DescrptSeA(NativeOP, BaseDescriptor):
@@ -160,7 +158,7 @@ def __init__(
         self.activation_function = activation_function
         self.precision = precision
         self.spin = spin
-        self.emask = ExcludeMask(self.ntypes, self.exclude_types)
+        self.emask = PairExcludeMask(self.ntypes, self.exclude_types)
 
         in_dim = 1  # not considiering type embedding
         self.embeddings = NetworkCollection(

diff --git a/deepmd/dpmodel/fitting/invar_fitting.py b/deepmd/dpmodel/fitting/invar_fitting.py
@@ -19,6 +19,7 @@
     fitting_check_output,
 )
 from deepmd.dpmodel.utils import (
+    AtomExcludeMask,
     FittingNet,
     NetworkCollection,
 )
@@ -126,6 +127,7 @@ def __init__(
         use_aparam_as_mask: bool = False,
         spin: Any = None,
         distinguish_types: bool = False,
+        exclude_types: List[int] = [],
     ):
         # seed, uniform_seed are not included
         if tot_ener_zero:
@@ -159,8 +161,10 @@ def __init__(
         self.use_aparam_as_mask = use_aparam_as_mask
         self.spin = spin
         self.distinguish_types = distinguish_types
+        self.exclude_types = exclude_types
         if self.spin is not None:
             raise NotImplementedError("spin is not supported")
+        self.emask = AtomExcludeMask(self.ntypes, exclude_types=self.exclude_types)
 
         # init constants
         self.bias_atom_e = np.zeros([self.ntypes, self.dim_out])
@@ -260,6 +264,7 @@ def serialize(self) -> dict:
             "precision": self.precision,
             "distinguish_types": self.distinguish_types,
             "nets": self.nets.serialize(),
+            "exclude_types": self.exclude_types,
             "@variables": {
                 "bias_atom_e": self.bias_atom_e,
                 "fparam_avg": self.fparam_avg,
@@ -370,4 +375,10 @@ def call(
                 outs = outs + atom_energy  # Shape is [nframes, natoms[0], 1]
         else:
             outs = self.nets[()](xx) + self.bias_atom_e[atype]
+
+        # nf x nloc
+        exclude_mask = self.emask.build_type_exclude_mask(atype)
+        # nf x nloc x nod
+        outs = outs * exclude_mask[:, :, None]
+
         return {self.var_name: outs}
diff --git a/deepmd/dpmodel/utils/__init__.py b/deepmd/dpmodel/utils/__init__.py
@@ -2,6 +2,10 @@
 from .env_mat import (
     EnvMat,
 )
+from .exclude_mask import (
+    AtomExcludeMask,
+    PairExcludeMask,
+)
 from .network import (
     EmbeddingNet,
     FittingNet,
@@ -53,4 +57,6 @@
     "inter2phys",
     "phys2inter",
     "to_face_distance",
+    "AtomExcludeMask",
+    "PairExcludeMask",
 ]
diff --git a/deepmd/dpmodel/descriptor/exclude_mask.py → deepmd/dpmodel/utils/exclude_mask.py b/deepmd/dpmodel/descriptor/exclude_mask.py → deepmd/dpmodel/utils/exclude_mask.py
@@ -7,15 +7,54 @@
 import numpy as np
 
 
-class ExcludeMask:
-    """Computes the atom type exclusion mask."""
+class AtomExcludeMask:
+    """Computes the type exclusion mask for atoms."""
+
+    def __init__(
+        self,
+        ntypes: int,
+        exclude_types: List[int] = [],
+    ):
+        self.ntypes = ntypes
+        self.exclude_types = exclude_types
+        self.type_mask = np.array(
+            [1 if tt_i not in self.exclude_types else 0 for tt_i in range(ntypes)],
+            dtype=np.int32,
+        )
+        # (ntypes)
+        self.type_mask = self.type_mask.reshape([-1])
+
+    def build_type_exclude_mask(
+        self,
+        atype: np.ndarray,
+    ):
+        """Compute type exclusion mask for atoms.
+
+        Parameters
+        ----------
+        atype
+            The extended aotm types. shape: nf x natom
+
+        Returns
+        -------
+        mask
+            The type exclusion mask for atoms. shape: nf x natom
+            Element [ff,ii] being 0 if type(ii) is excluded,
+            otherwise being 1.
+
+        """
+        nf, natom = atype.shape
+        return self.type_mask[atype].reshape(nf, natom)
+
+
+class PairExcludeMask:
+    """Computes the type exclusion mask for atom pairs."""
 
     def __init__(
         self,
         ntypes: int,
         exclude_types: List[Tuple[int, int]] = [],
     ):
-        super().__init__()
         self.ntypes = ntypes
         self.exclude_types = set()
         for tt in exclude_types:
@@ -41,7 +80,7 @@ def build_type_exclude_mask(
         nlist: np.ndarray,
         atype_ext: np.ndarray,
     ):
-        """Compute type exclusion mask.
+        """Compute type exclusion mask for atom pairs.
 
         Parameters
         ----------
@@ -53,7 +92,7 @@ def build_type_exclude_mask(
         Returns
         -------
         mask
-            The type exclusion mask of shape: nf x nloc x nnei.
+            The type exclusion mask for pair atoms of shape: nf x nloc x nnei.
             Element [ff,ii,jj] being 0 if type(ii), type(nlist[ff,ii,jj]) is excluded,
             otherwise being 1.
 

diff --git a/deepmd/pt/model/descriptor/descriptor.py b/deepmd/pt/model/descriptor/descriptor.py
@@ -8,8 +8,6 @@
     Callable,
     List,
     Optional,
-    Set,
-    Tuple,
     Union,
 )
 
@@ -22,9 +20,6 @@
 from deepmd.pt.utils.plugin import (
     Plugin,
 )
-from deepmd.pt.utils.utils import (
-    to_torch_tensor,
-)
 
 from .base_descriptor import (
     BaseDescriptor,
@@ -211,32 +206,6 @@ class DescriptorBlock(torch.nn.Module, ABC):
     __plugins = Plugin()
     local_cluster = False
 
-    def __init__(
-        self,
-        ntypes: int,
-        exclude_types: List[Tuple[int, int]] = [],
-    ):
-        super().__init__()
-        _exclude_types: Set[Tuple[int, int]] = set()
-        for tt in exclude_types:
-            assert len(tt) == 2
-            _exclude_types.add((tt[0], tt[1]))
-            _exclude_types.add((tt[1], tt[0]))
-        # ntypes + 1 for nlist masks
-        self.type_mask = np.array(
-            [
-                [
-                    1 if (tt_i, tt_j) not in _exclude_types else 0
-                    for tt_i in range(ntypes + 1)
-                ]
-                for tt_j in range(ntypes + 1)
-            ],
-            dtype=np.int32,
-        )
-        # (ntypes+1 x ntypes+1)
-        self.type_mask = to_torch_tensor(self.type_mask).view([-1])
-        self.no_exclusion = len(_exclude_types) == 0
-
     @staticmethod
     def register(key: str) -> Callable:
         """Register a DescriptorBlock plugin.
@@ -365,53 +334,6 @@ def forward(
         """Calculate DescriptorBlock."""
         pass
 
-    # may have a better place for this method...
-    def build_type_exclude_mask(
-        self,
-        nlist: torch.Tensor,
-        atype_ext: torch.Tensor,
-    ) -> torch.Tensor:
-        """Compute type exclusion mask.
-
-        Parameters
-        ----------
-        nlist
-            The neighbor list. shape: nf x nloc x nnei
-        atype_ext
-            The extended aotm types. shape: nf x nall
-
-        Returns
-        -------
-        mask
-            The type exclusion mask of shape: nf x nloc x nnei.
-            Element [ff,ii,jj] being 0 if type(ii), type(nlist[ff,ii,jj]) is excluded,
-            otherwise being 1.
-
-        """
-        if self.no_exclusion:
-            # safely return 1 if nothing is excluded.
-            return torch.ones_like(nlist, dtype=torch.int32, device=nlist.device)
-        nf, nloc, nnei = nlist.shape
-        nall = atype_ext.shape[1]
-        # add virtual atom of type ntypes. nf x nall+1
-        ae = torch.cat(
-            [
-                atype_ext,
-                self.get_ntypes()
-                * torch.ones([nf, 1], dtype=atype_ext.dtype, device=atype_ext.device),
-            ],
-            dim=-1,
-        )
-        type_i = atype_ext[:, :nloc].view(nf, nloc) * (self.get_ntypes() + 1)
-        # nf x nloc x nnei
-        index = torch.where(nlist == -1, nall, nlist).view(nf, nloc * nnei)
-        type_j = torch.gather(ae, 1, index).view(nf, nloc, nnei)
-        type_ij = type_i[:, :, None] + type_j
-        # nf x (nloc x nnei)
-        type_ij = type_ij.view(nf, nloc * nnei)
-        mask = self.type_mask[type_ij].view(nf, nloc, nnei)
-        return mask
-
 
 def compute_std(sumv2, sumv, sumn, rcut_r):
     """Compute standard deviation."""

diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py
@@ -32,7 +32,7 @@ def __init__(
         - descriptor_list: list of descriptors.
         - descriptor_param: descriptor configs.
         """
-        super().__init__(ntypes)
+        super().__init__()
         supported_descrpt = ["se_atten", "se_uni"]
         descriptor_list = []
         for descriptor_param_item in list: