rapidsai · rapids-bot · Mar 11, 2024 · Mar 2, 2024 · Mar 6, 2024 · Mar 7, 2024
@@ -186,6 +186,10 @@ def forward(
         nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]],
         efeat: Optional[torch.Tensor] = None,
         max_in_degree: Optional[int] = None,
+        deterministic_dgrad: bool = False,
+        deterministic_wgrad: bool = False,
+        high_precision_dgrad: bool = False,
+        high_precision_wgrad: bool = False,
     ) -> torch.Tensor:
         r"""Forward computation.
 
@@ -204,6 +208,20 @@ def forward(
             from a neighbor sampler, the value should be set to the corresponding
             :attr:`fanout`. This option is used to invoke the MFG-variant of
             cugraph-ops kernel.
+        deterministic_dgrad : bool, default=False
+            Optional flag indicating whether the feature gradients
+            are computed deterministically using a dedicated workspace buffer.
+        deterministic_wgrad: bool, default=False
+            Optional flag indicating whether the weight gradients
+            are computed deterministically using a dedicated workspace buffer.
+        high_precision_dgrad: bool, default=False
+            Optional flag indicating whether gradients for inputs in half precision
+            are kept in single precision as long as possible and only casted to
+            the corresponding input type at the very end.
+        high_precision_wgrad: bool, default=False
+            Optional flag indicating whether gradients for weights in half precision
+            are kept in single precision as long as possible and only casted to
+            the corresponding input type at the very end.
 
         Returns
         -------
@@ -232,6 +250,8 @@ def forward(
         _graph = self.get_cugraph_ops_CSC(
             g, is_bipartite=bipartite, max_in_degree=max_in_degree
         )
+        if deterministic_dgrad:
+            _graph.add_reverse_graph()
 
         if bipartite:
             nfeat = (self.feat_drop(nfeat[0]), self.feat_drop(nfeat[1]))
@@ -273,6 +293,10 @@ def forward(
             negative_slope=self.negative_slope,
             concat_heads=self.concat,
             edge_feat=efeat,
+            deterministic_dgrad=deterministic_dgrad,
+            deterministic_wgrad=deterministic_wgrad,
+            high_precision_dgrad=high_precision_dgrad,
+            high_precision_wgrad=high_precision_wgrad,
         )[: g.num_dst_nodes()]
 
         if self.concat:

@@ -150,6 +150,8 @@ def forward(
         nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]],
         efeat: Optional[torch.Tensor] = None,
         max_in_degree: Optional[int] = None,
+        deterministic_dgrad: bool = False,
+        deterministic_wgrad: bool = False,
     ) -> torch.Tensor:
         r"""Forward computation.
 
@@ -166,6 +168,12 @@ def forward(
             from a neighbor sampler, the value should be set to the corresponding
             :attr:`fanout`. This option is used to invoke the MFG-variant of
             cugraph-ops kernel.
+        deterministic_dgrad : bool, default=False
+            Optional flag indicating whether the feature gradients
+            are computed deterministically using a dedicated workspace buffer.
+        deterministic_wgrad: bool, default=False
+            Optional flag indicating whether the weight gradients
+            are computed deterministically using a dedicated workspace buffer.
 
         Returns
         -------
@@ -196,6 +204,8 @@ def forward(
         _graph = self.get_cugraph_ops_CSC(
             g, is_bipartite=graph_bipartite, max_in_degree=max_in_degree
         )
+        if deterministic_dgrad:
+            _graph.add_reverse_graph()
 
         if nfeat_bipartite:
             nfeat = (self.feat_drop(nfeat[0]), self.feat_drop(nfeat[1]))
@@ -228,6 +238,8 @@ def forward(
             negative_slope=self.negative_slope,
             concat_heads=self.concat,
             edge_feat=efeat,
+            deterministic_dgrad=deterministic_dgrad,
+            deterministic_wgrad=deterministic_wgrad,
         )[: g.num_dst_nodes()]
 
         if self.concat:

@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -162,6 +162,10 @@ def forward(
         csc: Tuple[torch.Tensor, torch.Tensor, int],
         edge_attr: Optional[torch.Tensor] = None,
         max_num_neighbors: Optional[int] = None,
+        deterministic_dgrad: bool = False,
+        deterministic_wgrad: bool = False,
+        high_precision_dgrad: bool = False,
+        high_precision_wgrad: bool = False,
     ) -> torch.Tensor:
         r"""Runs the forward pass of the module.
 
@@ -178,11 +182,27 @@ def forward(
                 of a destination node. When enabled, it allows models to use
                 the message-flow-graph primitives in cugraph-ops.
                 (default: :obj:`None`)
+            deterministic_dgrad : bool, default=False
+                Optional flag indicating whether the feature gradients
+                are computed deterministically using a dedicated workspace buffer.
+            deterministic_wgrad: bool, default=False
+                Optional flag indicating whether the weight gradients
+                are computed deterministically using a dedicated workspace buffer.
+            high_precision_dgrad: bool, default=False
+                Optional flag indicating whether gradients for inputs in half precision
+                are kept in single precision as long as possible and only casted to
+                the corresponding input type at the very end.
+            high_precision_wgrad: bool, default=False
+                Optional flag indicating whether gradients for weights in half precision
+                are kept in single precision as long as possible and only casted to
+                the corresponding input type at the very end.
         """
         bipartite = not isinstance(x, torch.Tensor)
         graph = self.get_cugraph(
             csc, bipartite=bipartite, max_num_neighbors=max_num_neighbors
         )
+        if deterministic_dgrad:
+            graph.add_reverse_graph()
 
         if edge_attr is not None:
             if self.lin_edge is None:
@@ -220,6 +240,10 @@ def forward(
             negative_slope=self.negative_slope,
             concat_heads=self.concat,
             edge_feat=edge_attr,
+            deterministic_dgrad=deterministic_dgrad,
+            deterministic_wgrad=deterministic_wgrad,
+            high_precision_dgrad=high_precision_dgrad,
+            high_precision_wgrad=high_precision_wgrad,
         )
 
         if self.bias is not None:

@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -174,6 +174,8 @@ def forward(
         x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]],
         csc: Tuple[torch.Tensor, torch.Tensor, int],
         edge_attr: Optional[torch.Tensor] = None,
+        deterministic_dgrad: bool = False,
+        deterministic_wgrad: bool = False,
     ) -> torch.Tensor:
         r"""Runs the forward pass of the module.
 
@@ -186,9 +188,17 @@ def forward(
                 :meth:`to_csc` method to convert an :obj:`edge_index`
                 representation to the desired format.
             edge_attr: (torch.Tensor, optional) The edge features.
+            deterministic_dgrad : bool, default=False
+                Optional flag indicating whether the feature gradients
+                are computed deterministically using a dedicated workspace buffer.
+            deterministic_wgrad: bool, default=False
+                Optional flag indicating whether the weight gradients
+                are computed deterministically using a dedicated workspace buffer.
         """
         bipartite = not isinstance(x, torch.Tensor) or not self.share_weights
         graph = self.get_cugraph(csc, bipartite=bipartite)
+        if deterministic_dgrad:
+            graph.add_reverse_graph()
 
         if edge_attr is not None:
             if self.lin_edge is None:
@@ -217,6 +227,8 @@ def forward(
             negative_slope=self.negative_slope,
             concat_heads=self.concat,
             edge_feat=edge_attr,
+            deterministic_dgrad=deterministic_dgrad,
+            deterministic_wgrad=deterministic_wgrad,
         )
 
         if self.bias is not None: