From 86b20382681c4e8adb41904d17daf398a3f5f204 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 19:52:08 -0800
Subject: [PATCH 01/60] add support for rng state

---
 cpp/include/cugraph_c/sampling_algorithms.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h
index f048d338b9..ac029181ba 100644
--- a/cpp/include/cugraph_c/sampling_algorithms.h
+++ b/cpp/include/cugraph_c/sampling_algorithms.h
@@ -83,6 +83,7 @@ cugraph_error_code_t cugraph_biased_random_walks(
  * @brief  Compute random walks using the node2vec framework.
  *
  * @param [in]  handle          Handle for accessing resources
+ * @param [in,out] rng_state State of the random number generator, updated with each call
  * @param [in]  graph           Pointer to graph.  NOTE: Graph might be modified if the storage
  *                              needs to be transposed
  * @param [in]  start_vertices  Array of source vertices
@@ -98,6 +99,7 @@ cugraph_error_code_t cugraph_biased_random_walks(
  */
 cugraph_error_code_t cugraph_node2vec_random_walks(
   const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
   cugraph_graph_t* graph,
   const cugraph_type_erased_device_array_view_t* start_vertices,
   size_t max_length,

From 38690a6e8096b7785649e4243d408abd51324708 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 19:56:03 -0800
Subject: [PATCH 02/60] update test to take rng state parameter

---
 cpp/tests/c_api/sg_random_walks_test.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cpp/tests/c_api/sg_random_walks_test.c b/cpp/tests/c_api/sg_random_walks_test.c
index 05d77a0b3b..71c76f3f94 100644
--- a/cpp/tests/c_api/sg_random_walks_test.c
+++ b/cpp/tests/c_api/sg_random_walks_test.c
@@ -308,8 +308,12 @@ int generic_node2vec_random_walks_test(vertex_t* h_src,
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed.");
 
+  cugraph_rng_state_t* rng_state;
+  ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
+
   ret_code = cugraph_node2vec_random_walks(
-    handle, graph, d_start_view, max_depth, p, q, &result, &ret_error);
+    handle, rng_state, graph, d_start_view, max_depth, p, q, &result, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "node2vec_random_walks failed.");

From fd5b387d91cbf3c6a6f33b84d2587a77aea6e273 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 19:57:20 -0800
Subject: [PATCH 03/60] add support for rng state

---
 cpp/src/c_api/random_walks.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp
index 705d210843..7d883df9dd 100644
--- a/cpp/src/c_api/random_walks.cpp
+++ b/cpp/src/c_api/random_walks.cpp
@@ -365,7 +365,6 @@ struct biased_random_walks_functor : public cugraph::c_api::abstract_functor {
 
 struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
-  //  FIXME: rng_state_ should be passed as a parameter
   cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr};
   cugraph::c_api::cugraph_graph_t* graph_{nullptr};
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr};
@@ -375,6 +374,7 @@ struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor {
   cugraph::c_api::cugraph_random_walk_result_t* result_{nullptr};
 
   node2vec_random_walks_functor(cugraph_resource_handle_t const* handle,
+                                cugraph_rng_state_t* rng_state,
                                 cugraph_graph_t* graph,
                                 cugraph_type_erased_device_array_view_t const* start_vertices,
                                 size_t max_length,
@@ -382,6 +382,7 @@ struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor {
                                 double q)
     : abstract_functor(),
       handle_(*reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle)->handle_),
+      rng_state_(reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(rng_state)),
       graph_(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)),
       start_vertices_(
         reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
@@ -443,10 +444,6 @@ struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor {
         graph_view.local_vertex_partition_range_last(),
         false);
 
-      //  FIXME: remove once rng_state passed as parameter
-      rng_state_ = reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(
-        new cugraph::c_api::cugraph_rng_state_t{raft::random::RngState{0}});
-
       auto [paths, weights] = cugraph::node2vec_random_walks(
         handle_,
         rng_state_->rng_state_,
@@ -588,6 +585,7 @@ cugraph_error_code_t cugraph_biased_random_walks(
 
 cugraph_error_code_t cugraph_node2vec_random_walks(
   const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
   cugraph_graph_t* graph,
   const cugraph_type_erased_device_array_view_t* start_vertices,
   size_t max_length,
@@ -604,7 +602,7 @@ cugraph_error_code_t cugraph_node2vec_random_walks(
                "vertex type of graph and start_vertices must match",
                *error);
 
-  node2vec_random_walks_functor functor(handle, graph, start_vertices, max_length, p, q);
+  node2vec_random_walks_functor functor(handle, rng_state, graph, start_vertices, max_length, p, q);
 
   return cugraph::c_api::run_algorithm(graph, functor, result, error);
 }

From 9d56b5f354dc4e23e650136457c6150e87a28d9a Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 20:09:12 -0800
Subject: [PATCH 04/60] deprecate old API

---
 python/pylibcugraph/pylibcugraph/node2vec.pyx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pylibcugraph/pylibcugraph/node2vec.pyx b/python/pylibcugraph/pylibcugraph/node2vec.pyx
index 0e0fd73e6c..5729dc6e05 100644
--- a/python/pylibcugraph/pylibcugraph/node2vec.pyx
+++ b/python/pylibcugraph/pylibcugraph/node2vec.pyx
@@ -66,6 +66,8 @@ def node2vec(ResourceHandle resource_handle,
     """
     Computes random walks under node2vec sampling procedure.
 
+    This API is deprecated call node2vec_random_walks instead
+
     Parameters
     ----------
     resource_handle : ResourceHandle

From 615837e80d55b407f932372501ec68df1249792d Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 20:12:57 -0800
Subject: [PATCH 05/60] add new API for node2vec random walks

---
 .../pylibcugraph/pylibcugraph/CMakeLists.txt  |   1 +
 python/pylibcugraph/pylibcugraph/__init__.py  |   2 +
 .../pylibcugraph/_cugraph_c/algorithms.pxd    |  19 +-
 .../pylibcugraph/node2vec_random_walks.pyx    | 184 ++++++++++++++++++
 4 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx

diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
index fe7c4b64aa..44963bdc5e 100644
--- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt
+++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
@@ -44,6 +44,7 @@ set(cython_sources
     leiden.pyx
     louvain.pyx
     node2vec.pyx
+    node2vec_random_walks.pyx
     pagerank.pyx
     personalized_pagerank.pyx
     random.pyx
diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py
index 9047144c13..cd5b23db1a 100644
--- a/python/pylibcugraph/pylibcugraph/__init__.py
+++ b/python/pylibcugraph/pylibcugraph/__init__.py
@@ -37,6 +37,8 @@
 
 from pylibcugraph.node2vec import node2vec
 
+from pylibcugraph.node2vec_random_walks import node2vec_random_walks
+
 from pylibcugraph.bfs import bfs
 
 from pylibcugraph.uniform_neighbor_sample import uniform_neighbor_sample
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
index 38781614b2..a8e5bb7fdc 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
@@ -24,6 +24,9 @@ from pylibcugraph._cugraph_c.error cimport (
     cugraph_error_code_t,
     cugraph_error_t,
 )
+from pylibcugraph._cugraph_c.random cimport (
+    cugraph_rng_state_t,
+)
 from pylibcugraph._cugraph_c.array cimport (
     cugraph_type_erased_device_array_view_t,
     cugraph_type_erased_host_array_view_t,
@@ -148,7 +151,7 @@ cdef extern from "cugraph_c/algorithms.h":
             cugraph_random_walk_result_t* result
         )
 
-    # node2vec
+    # node2vec - Deprecated, call node2vec_random_walks instead
     cdef cugraph_error_code_t \
         cugraph_node2vec(
             const cugraph_resource_handle_t* handle,
@@ -377,3 +380,17 @@ cdef extern from "cugraph_c/algorithms.h":
             cugraph_random_walk_result_t** result,
             cugraph_error_t** error
         )
+
+    # node2vec random walks
+    cdef cugraph_error_code_t \
+        cugraph_node2vec_random_walks(
+            const cugraph_resource_handle_t* handle,
+            cugraph_rng_state_t* rng_state,
+            cugraph_graph_t* graph,
+            const cugraph_type_erased_device_array_view_t* start_vertices,
+            size_t max_length,
+            double p,
+            double q,
+            cugraph_random_walk_result_t** result,
+            cugraph_error_t** error
+        )
\ No newline at end of file
diff --git a/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
new file mode 100644
index 0000000000..6df1472787
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
@@ -0,0 +1,184 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have cython use python 3 syntax
+# cython: language_level = 3
+
+from libc.stdint cimport uintptr_t
+
+from pylibcugraph._cugraph_c.resource_handle cimport (
+    bool_t,
+    cugraph_resource_handle_t,
+)
+from pylibcugraph._cugraph_c.error cimport (
+    cugraph_error_code_t,
+    cugraph_error_t,
+)
+from pylibcugraph._cugraph_c.array cimport (
+    cugraph_type_erased_device_array_view_t,
+    cugraph_type_erased_device_array_view_create,
+    cugraph_type_erased_device_array_view_free,
+)
+from pylibcugraph._cugraph_c.graph cimport (
+    cugraph_graph_t,
+)
+from pylibcugraph._cugraph_c.algorithms cimport (
+    cugraph_node2vec_random_walks,
+    cugraph_random_walk_result_t,
+    cugraph_random_walk_result_get_paths,
+    cugraph_random_walk_result_get_weights,
+    cugraph_random_walk_result_free,
+)
+from pylibcugraph.resource_handle cimport (
+    ResourceHandle,
+)
+from pylibcugraph.graphs cimport (
+    _GPUGraph,
+)
+from pylibcugraph._cugraph_c.random cimport (
+    cugraph_rng_state_t
+)
+from pylibcugraph.random cimport (
+    CuGraphRandomState
+)
+from pylibcugraph.utils cimport (
+    assert_success,
+    copy_to_cupy_array,
+    assert_CAI_type,
+    get_c_type_from_numpy_type,
+)
+
+
+def node2vec(ResourceHandle resource_handle,
+             _GPUGraph graph,
+             seed_array,
+             size_t max_depth,
+             double p,
+             double q,
+             random_state=None):
+    """
+    Computes random walks under node2vec sampling procedure.
+
+    Parameters
+    ----------
+    resource_handle : ResourceHandle
+        Handle to the underlying device resources needed for referencing data
+        and running algorithms.
+
+    graph : SGGraph
+        The input graph.
+
+    seed_array: device array type
+        Device array containing the pointer to the array of seed vertices.
+
+    max_depth : size_t
+        Maximum number of vertices in generated path
+
+    p : double
+        The return factor p represents the likelihood of backtracking to a node
+        in the walk. A higher value (> max(q, 1)) makes it less likely to sample
+        a previously visited node, while a lower value (< min(q, 1)) would make it
+        more likely to backtrack, making the walk more "local".
+
+    q : double
+        The in-out factor q represents the likelihood of visiting nodes closer or
+        further from the outgoing node. If q > 1, the random walk is likelier to
+        visit nodes closer to the outgoing node. If q < 1, the random walk is
+        likelier to visit nodes further from the outgoing node.
+    
+    random_state: int (Optional)
+        Random state to use when generating samples.  Optional argument,
+        defaults to a hash of process id, time, and hostname.
+        (See pylibcugraph.random.CuGraphRandomState)
+
+    Returns
+    -------
+    A tuple of device arrays, where the first item in the tuple is a device
+    array containing the compressed paths, the second item is a device
+    array containing the corresponding weights for each edge traversed in
+    each path.
+
+    Examples
+    --------
+    >>> import pylibcugraph, cupy, numpy
+    >>> srcs = cupy.asarray([0, 1, 2], dtype=numpy.int32)
+    >>> dsts = cupy.asarray([1, 2, 3], dtype=numpy.int32)
+    >>> seeds = cupy.asarray([0, 0, 1], dtype=numpy.int32)
+    >>> weights = cupy.asarray([1.0, 1.0, 1.0], dtype=numpy.float32)
+    >>> resource_handle = pylibcugraph.ResourceHandle()
+    >>> graph_props = pylibcugraph.GraphProperties(
+    ...     is_symmetric=False, is_multigraph=False)
+    >>> G = pylibcugraph.SGGraph(
+    ...     resource_handle, graph_props, srcs, dsts, weight_array=weights,
+    ...     store_transposed=False, renumber=False, do_expensive_check=False)
+    >>> (paths, weights, sizes) = pylibcugraph.node2vec(
+    ...                             resource_handle, G, seeds, 3, 1.0, 1.0)
+
+    """
+
+    # FIXME: import these modules here for now until a better pattern can be
+    # used for optional imports (perhaps 'import_optional()' from cugraph), or
+    # these are made hard dependencies.
+    try:
+        import cupy
+    except ModuleNotFoundError:
+        raise RuntimeError("node2vec requires the cupy package, which could not "
+                           "be imported")
+    assert_CAI_type(seed_array, "seed_array")
+
+    cdef cugraph_resource_handle_t* c_resource_handle_ptr = \
+        resource_handle.c_resource_handle_ptr
+    cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr
+
+    cdef cugraph_random_walk_result_t* result_ptr
+    cdef cugraph_error_code_t error_code
+    cdef cugraph_error_t* error_ptr
+
+    cdef uintptr_t cai_seed_ptr = \
+        seed_array.__cuda_array_interface__["data"][0]
+    cdef cugraph_type_erased_device_array_view_t* seed_view_ptr = \
+        cugraph_type_erased_device_array_view_create(
+            <void*>cai_seed_ptr,
+            len(seed_array),
+            get_c_type_from_numpy_type(seed_array.dtype))
+
+    cg_rng_state = CuGraphRandomState(resource_handle, random_state)
+    
+    cdef cugraph_rng_state_t* rng_state_ptr = \
+        cg_rng_state.rng_state_ptr
+
+    error_code = cugraph_node2vec_random_walks(c_resource_handle_ptr,
+                                               rng_state_ptr,
+                                               c_graph_ptr,
+                                               seed_view_ptr,
+                                               max_depth,
+                                               p,
+                                               q,
+                                               &result_ptr,
+                                               &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_node2vec_random_walks")
+
+    # Extract individual device array pointers from result and copy to cupy
+    # arrays for returning.
+    cdef cugraph_type_erased_device_array_view_t* paths_ptr = \
+        cugraph_random_walk_result_get_paths(result_ptr)
+    cdef cugraph_type_erased_device_array_view_t* weights_ptr = \
+        cugraph_random_walk_result_get_weights(result_ptr)
+
+    cupy_paths = copy_to_cupy_array(c_resource_handle_ptr, paths_ptr)
+    cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr)
+
+    cugraph_random_walk_result_free(result_ptr)
+    cugraph_type_erased_device_array_view_free(seed_view_ptr)
+
+    return (cupy_paths, cupy_weights)

From 21a76bb4921197896f4484bcbca34d9b89ea15bb Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 20:20:37 -0800
Subject: [PATCH 06/60] add mg node2vec random walks to the python API

---
 python/cugraph/cugraph/dask/__init__.py       |   1 +
 .../dask/sampling/node2vec_random_walks.py    | 218 ++++++++++++++++++
 2 files changed, 219 insertions(+)
 create mode 100644 python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py

diff --git a/python/cugraph/cugraph/dask/__init__.py b/python/cugraph/cugraph/dask/__init__.py
index b1588008bc..b8753fc461 100644
--- a/python/cugraph/cugraph/dask/__init__.py
+++ b/python/cugraph/cugraph/dask/__init__.py
@@ -28,6 +28,7 @@
 from .components.connectivity import weakly_connected_components
 from .sampling.uniform_neighbor_sample import uniform_neighbor_sample
 from .sampling.random_walks import random_walks
+from .sampling.node2vec_random_walks import node2vec_random_walks
 from .centrality.eigenvector_centrality import eigenvector_centrality
 from .cores.core_number import core_number
 from .centrality.betweenness_centrality import betweenness_centrality
diff --git a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
new file mode 100644
index 0000000000..d70601841c
--- /dev/null
+++ b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
@@ -0,0 +1,218 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dask.distributed import wait, default_client
+import dask_cudf
+import cudf
+import operator as op
+from cugraph.dask.common.part_utils import (
+    persist_dask_df_equal_parts_per_worker,
+)
+
+from pylibcugraph import ResourceHandle
+
+from pylibcugraph import (
+    node2vec_random_walks as pylibcugraph_node2vec_random_walks,
+)
+
+from cugraph.dask.comms import comms as Comms
+
+
+def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
+    """
+    Creates cudf Series from cupy arrays from pylibcugraph wrapper
+    """
+
+    if is_vertex_paths and len(cp_paths) > 0:
+        if number_map.implementation.numbered:
+            df_ = cudf.DataFrame()
+            df_["vertex_paths"] = cp_paths
+            df_ = number_map.unrenumber(
+                df_, "vertex_paths", preserve_order=True
+            ).compute()
+            vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
+
+            return vertex_paths
+
+    return cudf.Series(cp_paths)
+
+
+def _call_plc_node2vec_random_walks(sID, mg_graph_x, st_x, max_depth, compress_result, p, q):
+
+    return pylibcugraph_node2vec_random_walks(
+        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+        graph=mg_graph_x,
+        seed_array=st_x,
+        max_depth=max_depth,
+        compress_result=compress_result,
+        p=p,
+        q=q
+    )
+
+
+def node2vec_random_walks(
+    input_graph,
+    start_vertices=None,
+    max_depth=None,
+    compress_result=True,
+    p=1.0,
+    q=1.0
+):
+    """
+    Computes random walks for each node in 'start_vertices', under the
+    node2vec_random_walks sampling framework.
+
+    parameters
+    ----------
+    input_graph : cuGraph.Graph
+        The graph can be either directed or undirected.
+
+    start_vertices: int or list or cudf.Series or cudf.DataFrame
+        A single node or a list or a cudf.Series of nodes from which to run
+        the random walks. In case of multi-column vertices it should be
+        a cudf.DataFrame. Only supports int32 currently.
+
+    max_depth: int, optional (default=1)
+        The maximum depth of the random walks. If not specified, the maximum
+        depth is set to 1.
+
+    compress_result: bool, optional (default=True)
+        If True, coalesced paths are returned with a sizes array with offsets.
+        Otherwise padded paths are returned with an empty sizes array.
+
+    p: float, optional (default=1.0, [0 < p])
+        Return factor, which represents the likelihood of backtracking to
+        a previous node in the walk. A higher value makes it less likely to
+        sample a previously visited node, while a lower value makes it more
+        likely to backtrack, making the walk "local". A positive float.
+
+    q: float, optional (default=1.0, [0 < q])
+        In-out factor, which represents the likelihood of visiting nodes
+        closer or further from the outgoing node. If q > 1, the random walk
+        is likelier to visit nodes closer to the outgoing node. If q < 1, the
+        random walk is likelier to visit nodes further from the outgoing node.
+        A positive float.
+
+    Returns
+    -------
+    vertex_paths : dask_cudf.Series or dask_cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: dask_cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    sizes : dask_cudf.Series
+        The path size or sizes in case of coalesced paths.
+    """
+    client = default_client()
+
+    if (not isinstance(max_depth, int)) or (max_depth < 1):
+        raise ValueError(
+            f"'max_depth' must be a positive integer, " f"got: {max_depth}"
+        )
+    if not isinstance(compress_result, bool):
+        raise ValueError(
+            f"'compress_result' must be a bool, " f"got: {compress_result}"
+        )
+    if (not isinstance(p, float)) or (p <= 0.0):
+        raise ValueError(f"'p' must be a positive float, got: {p}")
+    if (not isinstance(q, float)) or (q <= 0.0):
+        raise ValueError(f"'q' must be a positive float, got: {q}")
+
+
+
+    if isinstance(start_vertices, int):
+        start_vertices = [start_vertices]
+
+    if isinstance(start_vertices, list):
+        start_vertices = cudf.Series(start_vertices)
+
+    # start_vertices uses "external" vertex IDs, but if the graph has been
+    # renumbered, the start vertex IDs must also be renumbered.
+    if input_graph.renumbered:
+        # FIXME: This should match start_vertices type to the renumbered df type
+        # but verify that. If not retrieve the type and cast it when creating
+        # the dask_cudf from a cudf
+        start_vertices = input_graph.lookup_internal_vertex_id(start_vertices).compute()
+        start_vertices_type = input_graph.edgelist.edgelist_df.dtypes[0]
+    else:
+        # FIXME: Get the 'src' column names instead and retrieve the type
+        start_vertices_type = input_graph.input_df.dtypes.iloc[0]
+    start_vertices = dask_cudf.from_cudf(
+        start_vertices, npartitions=min(input_graph._npartitions, len(start_vertices))
+    )
+    start_vertices = start_vertices.astype(start_vertices_type)
+    start_vertices = persist_dask_df_equal_parts_per_worker(
+        start_vertices, client, return_type="dict"
+    )
+
+    #print("start vertex_type = ", start_vertices_type)
+    #print("edgelist type = ", input_graph.edgelist.edgelist_df)
+
+    result = [
+        client.submit(
+            _call_plc_node2vec_random_walks,
+            Comms.get_session_id(),
+            input_graph._plc_graph[w],
+            start_v[0] if start_v else cudf.Series(dtype=start_vertices_type),
+            max_depth,
+            compress_result=compress_result,
+            p=p,
+            q=q,
+            workers=[w],
+            allow_other_workers=False,
+        )
+        for w, start_v in start_vertices.items()
+    ]
+
+    wait(result)
+
+    result_vertex_paths = [client.submit(op.getitem, f, 0) for f in result]
+    result_edge_wgt_paths = [client.submit(op.getitem, f, 1) for f in result]
+    result_sizes = [client.submit(op.getitem, f, 2) for f in result]
+
+    cudf_vertex_paths = [
+        client.submit(convert_to_cudf, cp_vertex_paths, input_graph.renumber_map, True)
+        for cp_vertex_paths in result_vertex_paths
+    ]
+
+    cudf_edge_wgt_paths = [
+        client.submit(convert_to_cudf, cp_edge_wgt_paths)
+        for cp_edge_wgt_paths in result_edge_wgt_paths
+    ]
+
+    cudf_sizes = [
+        client.submit(convert_to_cudf, cp_sizes)
+        for cp_sizes in result_sizes
+    ]
+
+    wait([cudf_vertex_paths, cudf_edge_wgt_paths, cudf_sizes])
+
+    
+    ddf_vertex_paths = dask_cudf.from_delayed(cudf_vertex_paths).persist()
+    ddf_edge_wgt_paths = dask_cudf.from_delayed(cudf_edge_wgt_paths).persist()
+    ddf_sizes = dask_cudf.from_delayed(cudf_sizes).persist()
+    #wait([ddf_vertex_paths, ddf_edge_wgt_paths])
+
+    # Wait until the inactive futures are released
+    wait(
+        [
+            (r.release(), c_v.release(), c_e.release())
+            for r, c_v, c_e, c_s in zip(result, cudf_vertex_paths, cudf_edge_wgt_paths, cudf_sizes)
+        ]
+    )
+
+    return ddf_vertex_paths, ddf_edge_wgt_paths, ddf_sizes
+    

From 86a13d3d0b058af28816694f21add0634696838f Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 20:29:07 -0800
Subject: [PATCH 07/60] update docstrings

---
 python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
index 6df1472787..fc391a5fec 100644
--- a/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
+++ b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
@@ -121,8 +121,8 @@ def node2vec(ResourceHandle resource_handle,
     >>> G = pylibcugraph.SGGraph(
     ...     resource_handle, graph_props, srcs, dsts, weight_array=weights,
     ...     store_transposed=False, renumber=False, do_expensive_check=False)
-    >>> (paths, weights, sizes) = pylibcugraph.node2vec(
-    ...                             resource_handle, G, seeds, 3, 1.0, 1.0)
+    >>> (paths, weights) = pylibcugraph.node2vec_random_walks(
+    ...                      resource_handle, G, seeds, 3, 1.0, 1.0)
 
     """
 

From 4c8744f2716a3681b4fe686418be0b08ccd35ea0 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 20:40:08 -0800
Subject: [PATCH 08/60] enable mg node2vec_random walks

---
 cpp/src/c_api/random_walks.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp
index 7d883df9dd..e21090f6d0 100644
--- a/cpp/src/c_api/random_walks.cpp
+++ b/cpp/src/c_api/random_walks.cpp
@@ -404,8 +404,6 @@ struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor {
     // FIXME: Think about how to handle SG vice MG
     if constexpr (!cugraph::is_candidate<vertex_t, edge_t, weight_t>::value) {
       unsupported();
-    } else if constexpr (multi_gpu) {
-      unsupported();
     } else {
       // random walks expects store_transposed == false
       if constexpr (store_transposed) {

From 4da1c7ed3bdd11bf915a37446c7336793c3c419b Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 20:42:22 -0800
Subject: [PATCH 09/60] update argument list in function call

---
 .../dask/sampling/node2vec_random_walks.py    | 48 ++++++-------------
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
index d70601841c..18171eda62 100644
--- a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
@@ -48,30 +48,31 @@ def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
     return cudf.Series(cp_paths)
 
 
-def _call_plc_node2vec_random_walks(sID, mg_graph_x, st_x, max_depth, compress_result, p, q):
+def _call_plc_node2vec_random_walks(sID, mg_graph_x, st_x, max_depth, p, q, random_state):
 
     return pylibcugraph_node2vec_random_walks(
         resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
         graph=mg_graph_x,
         seed_array=st_x,
         max_depth=max_depth,
-        compress_result=compress_result,
         p=p,
-        q=q
+        q=q,
+        random_state=random_state
     )
 
 
+# FIXME: Add type anotation
 def node2vec_random_walks(
     input_graph,
     start_vertices=None,
     max_depth=None,
-    compress_result=True,
     p=1.0,
-    q=1.0
+    q=1.0,
+    random_state=None
 ):
     """
     Computes random walks for each node in 'start_vertices', under the
-    node2vec_random_walks sampling framework.
+    node2vec sampling framework.
 
     parameters
     ----------
@@ -87,10 +88,6 @@ def node2vec_random_walks(
         The maximum depth of the random walks. If not specified, the maximum
         depth is set to 1.
 
-    compress_result: bool, optional (default=True)
-        If True, coalesced paths are returned with a sizes array with offsets.
-        Otherwise padded paths are returned with an empty sizes array.
-
     p: float, optional (default=1.0, [0 < p])
         Return factor, which represents the likelihood of backtracking to
         a previous node in the walk. A higher value makes it less likely to
@@ -103,6 +100,9 @@ def node2vec_random_walks(
         is likelier to visit nodes closer to the outgoing node. If q < 1, the
         random walk is likelier to visit nodes further from the outgoing node.
         A positive float.
+    
+    random_state: int, optional
+        Random seed to use when making sampling calls.
 
     Returns
     -------
@@ -112,9 +112,6 @@ def node2vec_random_walks(
     edge_weight_paths: dask_cudf.Series
         Series containing the edge weights of edges represented by the
         returned vertex_paths
-
-    sizes : dask_cudf.Series
-        The path size or sizes in case of coalesced paths.
     """
     client = default_client()
 
@@ -122,10 +119,6 @@ def node2vec_random_walks(
         raise ValueError(
             f"'max_depth' must be a positive integer, " f"got: {max_depth}"
         )
-    if not isinstance(compress_result, bool):
-        raise ValueError(
-            f"'compress_result' must be a bool, " f"got: {compress_result}"
-        )
     if (not isinstance(p, float)) or (p <= 0.0):
         raise ValueError(f"'p' must be a positive float, got: {p}")
     if (not isinstance(q, float)) or (q <= 0.0):
@@ -158,9 +151,6 @@ def node2vec_random_walks(
         start_vertices, client, return_type="dict"
     )
 
-    #print("start vertex_type = ", start_vertices_type)
-    #print("edgelist type = ", input_graph.edgelist.edgelist_df)
-
     result = [
         client.submit(
             _call_plc_node2vec_random_walks,
@@ -168,9 +158,9 @@ def node2vec_random_walks(
             input_graph._plc_graph[w],
             start_v[0] if start_v else cudf.Series(dtype=start_vertices_type),
             max_depth,
-            compress_result=compress_result,
             p=p,
             q=q,
+            random_state=random_state,
             workers=[w],
             allow_other_workers=False,
         )
@@ -181,7 +171,6 @@ def node2vec_random_walks(
 
     result_vertex_paths = [client.submit(op.getitem, f, 0) for f in result]
     result_edge_wgt_paths = [client.submit(op.getitem, f, 1) for f in result]
-    result_sizes = [client.submit(op.getitem, f, 2) for f in result]
 
     cudf_vertex_paths = [
         client.submit(convert_to_cudf, cp_vertex_paths, input_graph.renumber_map, True)
@@ -193,26 +182,17 @@ def node2vec_random_walks(
         for cp_edge_wgt_paths in result_edge_wgt_paths
     ]
 
-    cudf_sizes = [
-        client.submit(convert_to_cudf, cp_sizes)
-        for cp_sizes in result_sizes
-    ]
-
-    wait([cudf_vertex_paths, cudf_edge_wgt_paths, cudf_sizes])
+    wait([cudf_vertex_paths, cudf_edge_wgt_paths])
 
-    
     ddf_vertex_paths = dask_cudf.from_delayed(cudf_vertex_paths).persist()
     ddf_edge_wgt_paths = dask_cudf.from_delayed(cudf_edge_wgt_paths).persist()
-    ddf_sizes = dask_cudf.from_delayed(cudf_sizes).persist()
-    #wait([ddf_vertex_paths, ddf_edge_wgt_paths])
 
-    # Wait until the inactive futures are released
     wait(
         [
             (r.release(), c_v.release(), c_e.release())
-            for r, c_v, c_e, c_s in zip(result, cudf_vertex_paths, cudf_edge_wgt_paths, cudf_sizes)
+            for r, c_v, c_e in zip(result, cudf_vertex_paths, cudf_edge_wgt_paths)
         ]
     )
 
-    return ddf_vertex_paths, ddf_edge_wgt_paths, ddf_sizes
+    return ddf_vertex_paths, ddf_edge_wgt_paths
     

From 6984645690f8ee7ddfa4620bf2c8b3f75d90d8ee Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 27 Dec 2024 21:26:33 -0800
Subject: [PATCH 10/60] support optional weights

---
 .../pylibcugraph/node2vec_random_walks.pyx            | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
index fc391a5fec..d1c4760d1f 100644
--- a/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
+++ b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
@@ -59,7 +59,7 @@ from pylibcugraph.utils cimport (
 )
 
 
-def node2vec(ResourceHandle resource_handle,
+def node2vec_random_walks(ResourceHandle resource_handle,
              _GPUGraph graph,
              seed_array,
              size_t max_depth,
@@ -172,11 +172,14 @@ def node2vec(ResourceHandle resource_handle,
     # arrays for returning.
     cdef cugraph_type_erased_device_array_view_t* paths_ptr = \
         cugraph_random_walk_result_get_paths(result_ptr)
-    cdef cugraph_type_erased_device_array_view_t* weights_ptr = \
-        cugraph_random_walk_result_get_weights(result_ptr)
+
+    if graph.weights_view_ptr is NULL and graph.weights_view_ptr_ptr is NULL:
+        cupy_weights = None
+    else:
+        weights_ptr = cugraph_random_walk_result_get_weights(result_ptr)
+        cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr)
 
     cupy_paths = copy_to_cupy_array(c_resource_handle_ptr, paths_ptr)
-    cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr)
 
     cugraph_random_walk_result_free(result_ptr)
     cugraph_type_erased_device_array_view_free(seed_view_ptr)

From d04588aeb7962c6ca05ce54fddff04c59a9ece51 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Mon, 30 Dec 2024 19:53:05 -0800
Subject: [PATCH 11/60] update docstring and deprecate arguments

---
 .../cugraph/cugraph/sampling/random_walks.py  | 44 +++++++++++++++++--
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/python/cugraph/cugraph/sampling/random_walks.py b/python/cugraph/cugraph/sampling/random_walks.py
index 1c56dbbe32..d4f8c3c53d 100644
--- a/python/cugraph/cugraph/sampling/random_walks.py
+++ b/python/cugraph/cugraph/sampling/random_walks.py
@@ -65,7 +65,7 @@ def random_walks(
     with 0.0s (when 'legacy_result_type' is 'True'). If 'legacy_result_type'
     is 'False', 'random_walks' returns padded results (vertex_paths,
     edge_weight_paths) but instead of 'sizes = None', returns the 'max_path_lengths'.
-    When 'legacy_result_type' is 'False', the arhument 'use_padding' is ignored.
+    When 'legacy_result_type' is 'False', the argument 'use_padding' is ignored.
 
     parameters
     ----------
@@ -81,6 +81,8 @@ def random_walks(
         Type of random walks: 'uniform', 'biased', 'node2vec'.
         Only 'uniform' random walks is currently supported
 
+        Deprecated
+
     start_vertices : int or list or cudf.Series or cudf.DataFrame
         A single node or a list or a cudf.Series of nodes from which to run
         the random walks. In case of multi-column vertices it should be
@@ -126,13 +128,39 @@ def random_walks(
 
     """
 
-    if legacy_result_type:
+    warning_msg = (
+            "random_walks is deprecated and will be removed "
+            "in the next release in favor of uniform_random_walks"
+        )
+    warnings.warn(warning_msg, FutureWarning)
+
+    # FIXME: Coalesced path results have been deprecated and should no longer be
+    # supported in 25.02.
+    # Context for legacy_result_type: The initial implementation of random_walks
+    # returned results where the vertex and weight path are proportional to the
+    # number of vertices instead of the number of edges hence the flag
+    # 'legacy_result_type' was created. This flag should be removed in favor of
+    # returning results paths proprtional to the number of edges. Furthermore,
+    # Coalesced path results should also be removed in favor of always returning
+    # padded results. The flags 'legacy_result_type' and 'use_padding" should be
+    # removed. 
+
+    if legacy_result_type or use_padding is False:
         warning_msg = (
             "Coalesced path results, returned when setting legacy_result_type=True, "
             "is deprecated and will no longer be supported in the next releases. "
             "only padded paths will be returned instead"
         )
         warnings.warn(warning_msg, PendingDeprecationWarning)
+    
+    if random_walks_type != "uniform":
+        warning_msg = (
+            "random_walks_type is deprecated and will be removed "
+            "in the next release. If random_walks_type == 'biased' or 'node2vec, "
+            "call 'biased_random_walks' or 'node2vec_random_walks'."
+        )
+    warnings.warn(warning_msg, FutureWarning)
+
 
     if max_depth is None:
         raise TypeError("must specify a 'max_depth'")
@@ -142,6 +170,9 @@ def random_walks(
     # data struct like a dictionary, etc.). The 2nd value is ignored here,
     # which is typically named isNx and used to convert the return type.
     # Consider a different return type if Nx types are passed in.
+    # The new API for random walk should instead always return the triple
+    # (vertex_paths, edge_wgt_paths, max_path_length)
+    
     G, _ = ensure_cugraph_obj_for_nx(G)
 
     if isinstance(start_vertices, int):
@@ -191,7 +222,7 @@ def random_walks(
         )
         warnings.warn(warning_msg, PendingDeprecationWarning)
 
-        # Drop the last vertex and and edge weight from each vertex and edge weight
+        # Drop the last vertex and edge weight from each vertex and edge weight
         # paths.
         vertex_paths = vertex_paths.drop(
             index=vertex_paths[max_depth :: max_depth + 1].index
@@ -202,11 +233,16 @@ def random_walks(
         ).reset_index(drop=True)
 
         if use_padding:
+            # When padding, the 'sizes' array is not necessary because
+            # 'vertex_paths' and 'edge_wgt_paths' contain all information
+            # because of the padding factor.
             sizes = None
             # FIXME: Is it necessary to slice it with 'edge_wgt_paths_sz'?
             return vertex_paths, edge_wgt_paths, sizes
 
         # If 'use_padding' is False, compute the sizes of the unpadded results
+        # since the padded value (-1) will be removed which will make it difficult
+        # to identify the end and the beginning of a new path.
 
         sizes = (
             vertex_paths.apply(lambda x: 1 if x != -1 else 0)
@@ -251,7 +287,7 @@ def rw_path(
     Returns
     -------
     path_data : cudf.DataFrame
-        Dataframe containing vetex path offsets, edge weight offsets and
+        Dataframe containing vertex path offsets, edge weight offsets and
         edge weight sizes for each path.
     """
 

From cb6a29414b79ea413debb4d02f1e14b599da40f0 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Mon, 30 Dec 2024 19:54:47 -0800
Subject: [PATCH 12/60] add new API for uniform_random_walks

---
 .../cugraph/sampling/uniform_random_walks.py  | 166 ++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 python/cugraph/cugraph/sampling/uniform_random_walks.py

diff --git a/python/cugraph/cugraph/sampling/uniform_random_walks.py b/python/cugraph/cugraph/sampling/uniform_random_walks.py
new file mode 100644
index 0000000000..72484240db
--- /dev/null
+++ b/python/cugraph/cugraph/sampling/uniform_random_walks.py
@@ -0,0 +1,166 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cudf
+import cupy as cp
+from pylibcugraph import ResourceHandle
+from pylibcugraph import (
+    uniform_random_walks as pylibcugraph_uniform_random_walks,
+)
+
+from cugraph.structure import Graph
+
+import warnings
+from typing import Union, Tuple
+
+
+def uniform_random_walks(
+    G: Graph,
+    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame] = None,
+    max_depth: int = None,
+) -> Tuple[cudf.Series, cudf.Series, Union[None, int, cudf.Series]]:
+    """
+    Compute uniform random walks for each nodes in 'start_vertices'.
+    Vertices with no outgoing edges will be padded with -1 and the corresponding
+    edge weights with 0.0.
+
+    parameters
+    ----------
+    G : cuGraph.Graph
+        The graph can be either directed or undirected.
+
+    start_vertices : int or list or cudf.Series or cudf.DataFrame
+        A single node or a list or a cudf.Series of nodes from which to run
+        the random walks. In case of multi-column vertices it should be
+        a cudf.DataFrame
+
+    max_depth : int
+        The maximum depth of the random walks
+
+        The max depth is relative to the number of edges hence the vertex_paths size
+        is max_depth + 1. For instance, a 'max_depth' of 2 with only one seed will
+        result in a vertex_path of size 3.
+
+
+    Returns
+    -------
+    vertex_paths : cudf.Series or cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    and
+
+    max_path_length : int
+        The maximum path length.
+
+    Examples
+    --------
+    >>> from cugraph.datasets import karate
+    >>> M = karate.get_edgelist(download=True)
+    >>> G = karate.get_graph()
+    >>> start_vertices = G.nodes()[:4]
+    >>> _, _, _ = cugraph.uniform_random_walks(G, start_vertices, 3)
+
+    """
+
+    if max_depth is None:
+        raise TypeError("must specify a 'max_depth'")
+
+    if isinstance(start_vertices, int):
+        start_vertices = [start_vertices]
+
+    if isinstance(start_vertices, list):
+        # Ensure the 'start_vertices' have the same dtype as the edge list.
+        # Failing to do that may produce erroneous results.
+        vertex_dtype = G.edgelist.edgelist_df.dtypes.iloc[0]
+        start_vertices = cudf.Series(start_vertices, dtype=vertex_dtype)
+
+    if G.renumbered is True:
+        if isinstance(start_vertices, cudf.DataFrame):
+            start_vertices = G.lookup_internal_vertex_id(
+                start_vertices, start_vertices.columns
+            )
+        else:
+            start_vertices = G.lookup_internal_vertex_id(start_vertices)
+
+    vertex_paths, edge_wgt_paths, max_path_length = pylibcugraph_uniform_random_walks(
+        resource_handle=ResourceHandle(),
+        input_graph=G._plc_graph,
+        start_vertices=start_vertices,
+        max_length=max_depth,
+    )
+
+    vertex_paths = cudf.Series(vertex_paths)
+
+    if G.renumbered:
+        df_ = cudf.DataFrame()
+        df_["vertex_paths"] = vertex_paths
+        df_ = G.unrenumber(df_, "vertex_paths", preserve_order=True)
+        vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
+
+    edge_wgt_paths = cudf.Series(edge_wgt_paths)
+
+    return (
+        vertex_paths,
+        edge_wgt_paths,
+        max_path_length,
+    )
+
+
+def rw_path(
+    num_paths: int, sizes: cudf.Series
+) -> Tuple[cudf.Series, cudf.Series, cudf.Series]:
+    """
+    Retrieve more information on the obtained paths in case use_padding
+    is False.
+
+    parameters
+    ----------
+    num_paths: int
+        Number of paths in the random walk output.
+
+    sizes: cudf.Series
+        Path size returned in random walk output.
+
+    Returns
+    -------
+    path_data : cudf.DataFrame
+        Dataframe containing vertex path offsets, edge weight offsets and
+        edge weight sizes for each path.
+    """
+
+    vertex_offsets = cudf.Series(0, dtype=sizes.dtype)
+    vertex_offsets = cudf.concat(
+        [vertex_offsets, sizes.cumsum()[:-1]], ignore_index=True
+    )
+    weight_sizes = sizes - 1
+
+    weight_offsets = cudf.Series(0, dtype=sizes.dtype)
+    num_edges = vertex_offsets.diff()[1:] - 1
+
+    weight_offsets = cudf.concat(
+        [weight_offsets, num_edges.cumsum()], ignore_index=True
+    )
+    # FIXME: CUDF bug. concatenating two series of type int32 but get a CUDF of
+    # type 'int64' have to cast the results
+    weight_offsets = weight_offsets.astype(sizes.dtype)
+
+    path_data = cudf.DataFrame()
+    path_data["vertex_offsets"] = vertex_offsets
+    path_data["weight_sizes"] = weight_sizes
+    path_data["weight_offsets"] = weight_offsets
+
+    return path_data[:num_paths]

From 793602628d21e4362c827b100ae85cc0f566bbe7 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Mon, 30 Dec 2024 20:02:30 -0800
Subject: [PATCH 13/60] deprecate method

---
 python/cugraph/cugraph/sampling/__init__.py     | 1 +
 python/cugraph/cugraph/sampling/random_walks.py | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/python/cugraph/cugraph/sampling/__init__.py b/python/cugraph/cugraph/sampling/__init__.py
index de5c43bdd0..3af0b84a6e 100644
--- a/python/cugraph/cugraph/sampling/__init__.py
+++ b/python/cugraph/cugraph/sampling/__init__.py
@@ -12,5 +12,6 @@
 # limitations under the License.
 
 from cugraph.sampling.random_walks import random_walks, rw_path
+from cugraph.sampling.uniform_random_walks import uniform_random_walks
 from cugraph.sampling.node2vec import node2vec
 from cugraph.sampling.uniform_neighbor_sample import uniform_neighbor_sample
diff --git a/python/cugraph/cugraph/sampling/random_walks.py b/python/cugraph/cugraph/sampling/random_walks.py
index d4f8c3c53d..f0177d92c4 100644
--- a/python/cugraph/cugraph/sampling/random_walks.py
+++ b/python/cugraph/cugraph/sampling/random_walks.py
@@ -291,6 +291,13 @@ def rw_path(
         edge weight sizes for each path.
     """
 
+    warning_msg = (
+            "This method is deprecated in favor of always returning "
+            "padded results."
+        )
+
+    warnings.warn(warning_msg, PendingDeprecationWarning)
+
     vertex_offsets = cudf.Series(0, dtype=sizes.dtype)
     vertex_offsets = cudf.concat(
         [vertex_offsets, sizes.cumsum()[:-1]], ignore_index=True

From 7a5056fcd4c5f5da3a9b356658ec9591996692e9 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Mon, 30 Dec 2024 20:03:25 -0800
Subject: [PATCH 14/60] update copyrights

---
 python/cugraph/cugraph/sampling/__init__.py             | 2 +-
 python/cugraph/cugraph/sampling/uniform_random_walks.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cugraph/cugraph/sampling/__init__.py b/python/cugraph/cugraph/sampling/__init__.py
index 3af0b84a6e..723457d62c 100644
--- a/python/cugraph/cugraph/sampling/__init__.py
+++ b/python/cugraph/cugraph/sampling/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/sampling/uniform_random_walks.py b/python/cugraph/cugraph/sampling/uniform_random_walks.py
index 72484240db..36ea7eeeec 100644
--- a/python/cugraph/cugraph/sampling/uniform_random_walks.py
+++ b/python/cugraph/cugraph/sampling/uniform_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at

From e2e4694f2c6ec62e63bb141738107b62f5e46f53 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Mon, 30 Dec 2024 20:04:50 -0800
Subject: [PATCH 15/60] add uniform random walks

---
 python/cugraph/cugraph/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py
index ada1fec74c..3f960433ca 100644
--- a/python/cugraph/cugraph/__init__.py
+++ b/python/cugraph/cugraph/__init__.py
@@ -108,6 +108,7 @@
 
 from cugraph.sampling import (
     random_walks,
+    uniform_random_walks,
     rw_path,
     node2vec,
     uniform_neighbor_sample,

From 877265b1f258dd0e39ecfe165339ab56d77b0c09 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Mon, 30 Dec 2024 20:24:03 -0800
Subject: [PATCH 16/60] add new API for node2vec random walks

---
 .../cugraph/sampling/node2vec_random_walks.py | 160 ++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 python/cugraph/cugraph/sampling/node2vec_random_walks.py

diff --git a/python/cugraph/cugraph/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
new file mode 100644
index 0000000000..05c256c63a
--- /dev/null
+++ b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pylibcugraph import (
+    ResourceHandle,
+    node2vec as pylibcugraph_node2vec_random_walks,
+)
+import warnings
+
+import cudf
+
+
+# FIXME: Move this function to the utility module so that it can be
+# shared by other algos
+def ensure_valid_dtype(input_graph, start_vertices):
+    vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
+    if isinstance(start_vertices, cudf.Series):
+        start_vertices_dtype = start_vertices.dtype
+    else:
+        start_vertices_dtype = start_vertices.dtypes.iloc[0]
+
+    if start_vertices_dtype != vertex_dtype:
+        warning_msg = (
+            "Node2vec requires 'start_vertices' to match the graph's "
+            f"'vertex' type. input graph's vertex type is: {vertex_dtype} and got "
+            f"'start_vertices' of type: {start_vertices_dtype}."
+        )
+        warnings.warn(warning_msg, UserWarning)
+        start_vertices = start_vertices.astype(vertex_dtype)
+
+    return start_vertices
+
+
+def node2vec(G, start_vertices, max_depth=1, p=1.0, q=1.0, random_state=None):
+    """
+    Computes random walks for each node in 'start_vertices', under the
+    node2vec sampling framework.
+
+    References
+    ----------
+
+    A Grover, J Leskovec: node2vec: Scalable Feature Learning for Networks,
+    Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge
+    Discovery and Data Mining, https://arxiv.org/abs/1607.00653
+
+    Parameters
+    ----------
+    G : cuGraph.Graph
+        The graph can be either directed or undirected.
+        Weights in the graph are ignored.
+
+    start_vertices: int or list or cudf.Series or cudf.DataFrame
+        A single node or a list or a cudf.Series of nodes from which to run
+        the random walks. In case of multi-column vertices it should be
+        a cudf.DataFrame. Only supports int32 currently.
+
+    max_depth: int, optional (default=1)
+        The maximum depth of the random walks. If not specified, the maximum
+        depth is set to 1.
+
+    p: float, optional (default=1.0, [0 < p])
+        Return factor, which represents the likelihood of backtracking to
+        a previous node in the walk. A higher value makes it less likely to
+        sample a previously visited node, while a lower value makes it more
+        likely to backtrack, making the walk "local". A positive float.
+
+    q: float, optional (default=1.0, [0 < q])
+        In-out factor, which represents the likelihood of visiting nodes
+        closer or further from the outgoing node. If q > 1, the random walk
+        is likelier to visit nodes closer to the outgoing node. If q < 1, the
+        random walk is likelier to visit nodes further from the outgoing node.
+        A positive float.
+    
+    random_state: int, optional
+        Random seed to use when making sampling calls.
+
+    Returns
+    -------
+    vertex_paths : cudf.Series or cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    and
+
+    max_path_length : int
+        The maximum path length.
+
+    Examples
+    --------
+    >>> from cugraph.datasets import karate
+    >>> G = karate.get_graph(download=True)
+    >>> start_vertices = cudf.Series([0, 2], dtype=np.int32)
+    >>> paths, weights, max_length = cugraph.node2vec_random_walks(G,
+    ...                                               start_vertices, 3,
+    ...                                               0.8, 0.5)
+
+    """
+    if (not isinstance(max_depth, int)) or (max_depth < 1):
+        raise ValueError(
+            f"'max_depth' must be a positive integer, " f"got: {max_depth}"
+        )
+    if (not isinstance(p, float)) or (p <= 0.0):
+        raise ValueError(f"'p' must be a positive float, got: {p}")
+    if (not isinstance(q, float)) or (q <= 0.0):
+        raise ValueError(f"'q' must be a positive float, got: {q}")
+
+
+    if isinstance(start_vertices, int):
+        start_vertices = [start_vertices]
+
+    if isinstance(start_vertices, list):
+        start_vertices = cudf.Series(start_vertices, dtype="int32")
+        # FIXME: Verify if this condition still holds
+        if start_vertices.dtype != "int32":
+            raise ValueError(
+                f"'start_vertices' must have int32 values, "
+                f"got: {start_vertices.dtype}"
+            )
+
+    if G.renumbered is True:
+        if isinstance(start_vertices, cudf.DataFrame):
+            start_vertices = G.lookup_internal_vertex_id(
+                start_vertices, start_vertices.columns
+            )
+        else:
+            start_vertices = G.lookup_internal_vertex_id(start_vertices)
+
+    start_vertices = ensure_valid_dtype(G, start_vertices)
+
+    vertex_set, edge_set = pylibcugraph_node2vec_random_walks(
+        resource_handle=ResourceHandle(),
+        graph=G._plc_graph,
+        seed_array=start_vertices,
+        max_depth=max_depth,
+        p=p,
+        q=q,
+        random_state=random_state
+    )
+    vertex_set = cudf.Series(vertex_set)
+    edge_set = cudf.Series(edge_set)
+
+    if G.renumbered:
+        df_ = cudf.DataFrame()
+        df_["vertex_set"] = vertex_set
+        df_ = G.unrenumber(df_, "vertex_set", preserve_order=True)
+        vertex_set = cudf.Series(df_["vertex_set"])
+    return vertex_set, edge_set, max_depth

From bb772377c5a0acb01399c75cd2f387de9e290b82 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Mon, 30 Dec 2024 20:25:18 -0800
Subject: [PATCH 17/60] deprecate legacy implementation

---
 python/cugraph/cugraph/sampling/node2vec.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/cugraph/cugraph/sampling/node2vec.py b/python/cugraph/cugraph/sampling/node2vec.py
index eb91bfec82..b5c93873fd 100644
--- a/python/cugraph/cugraph/sampling/node2vec.py
+++ b/python/cugraph/cugraph/sampling/node2vec.py
@@ -112,6 +112,12 @@ def node2vec(G, start_vertices, max_depth=1, compress_result=True, p=1.0, q=1.0)
     ...                                               True, 0.8, 0.5)
 
     """
+    warning_msg = (
+            "random_walks is deprecated and will be removed "
+            "in the next release in favor of uniform_random_walks"
+        )
+    warnings.warn(warning_msg, FutureWarning)
+
     if (not isinstance(max_depth, int)) or (max_depth < 1):
         raise ValueError(
             f"'max_depth' must be a positive integer, " f"got: {max_depth}"

From 618fe76650c93cb958eaa5c2a6f1f1638ac808a2 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Mon, 30 Dec 2024 20:30:39 -0800
Subject: [PATCH 18/60] add random state argumment and update copyright

---
 python/cugraph/cugraph/sampling/node2vec_random_walks.py | 2 +-
 python/cugraph/cugraph/sampling/uniform_random_walks.py  | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/python/cugraph/cugraph/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
index 05c256c63a..530ce6ebd3 100644
--- a/python/cugraph/cugraph/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/sampling/uniform_random_walks.py b/python/cugraph/cugraph/sampling/uniform_random_walks.py
index 36ea7eeeec..0efef29bbd 100644
--- a/python/cugraph/cugraph/sampling/uniform_random_walks.py
+++ b/python/cugraph/cugraph/sampling/uniform_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -20,7 +20,6 @@
 
 from cugraph.structure import Graph
 
-import warnings
 from typing import Union, Tuple
 
 
@@ -28,6 +27,7 @@ def uniform_random_walks(
     G: Graph,
     start_vertices: Union[int, list, cudf.Series, cudf.DataFrame] = None,
     max_depth: int = None,
+    random_state: int = None,
 ) -> Tuple[cudf.Series, cudf.Series, Union[None, int, cudf.Series]]:
     """
     Compute uniform random walks for each nodes in 'start_vertices'.
@@ -50,6 +50,9 @@ def uniform_random_walks(
         The max depth is relative to the number of edges hence the vertex_paths size
         is max_depth + 1. For instance, a 'max_depth' of 2 with only one seed will
         result in a vertex_path of size 3.
+    
+    random_state: int, optional
+        Random seed to use when making sampling calls.
 
 
     Returns

From a1d004c454db169a9a375dc4f43dfe1d2d059061 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 31 Dec 2024 15:42:31 -0800
Subject: [PATCH 19/60] update header file to take as input a random state

---
 cpp/include/cugraph_c/sampling_algorithms.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h
index ac029181ba..8c0040fff1 100644
--- a/cpp/include/cugraph_c/sampling_algorithms.h
+++ b/cpp/include/cugraph_c/sampling_algorithms.h
@@ -41,6 +41,7 @@ typedef struct {
  * @brief  Compute uniform random walks
  *
  * @param [in]  handle          Handle for accessing resources
+ * @param [in,out] rng_state    State of the random number generator, updated with each call
  * @param [in]  graph           Pointer to graph.  NOTE: Graph might be modified if the storage
  *                              needs to be transposed
  * @param [in]  start_vertices  Array of source vertices
@@ -52,6 +53,7 @@ typedef struct {
  */
 cugraph_error_code_t cugraph_uniform_random_walks(
   const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
   cugraph_graph_t* graph,
   const cugraph_type_erased_device_array_view_t* start_vertices,
   size_t max_length,
@@ -62,6 +64,7 @@ cugraph_error_code_t cugraph_uniform_random_walks(
  * @brief  Compute biased random walks
  *
  * @param [in]  handle          Handle for accessing resources
+ * @param [in,out] rng_state    State of the random number generator, updated with each call
  * @param [in]  graph           Pointer to graph.  NOTE: Graph might be modified if the storage
  *                              needs to be transposed
  * @param [in]  start_vertices  Array of source vertices
@@ -73,6 +76,7 @@ cugraph_error_code_t cugraph_uniform_random_walks(
  */
 cugraph_error_code_t cugraph_biased_random_walks(
   const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
   cugraph_graph_t* graph,
   const cugraph_type_erased_device_array_view_t* start_vertices,
   size_t max_length,
@@ -83,7 +87,7 @@ cugraph_error_code_t cugraph_biased_random_walks(
  * @brief  Compute random walks using the node2vec framework.
  *
  * @param [in]  handle          Handle for accessing resources
- * @param [in,out] rng_state State of the random number generator, updated with each call
+ * @param [in,out] rng_state    State of the random number generator, updated with each call
  * @param [in]  graph           Pointer to graph.  NOTE: Graph might be modified if the storage
  *                              needs to be transposed
  * @param [in]  start_vertices  Array of source vertices

From bea2a2f5d92cab3586fbc81a4b09a6840819a738 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 31 Dec 2024 15:46:08 -0800
Subject: [PATCH 20/60] add support for rng state as input

---
 cpp/src/c_api/random_walks.cpp | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp
index e21090f6d0..c38db96baf 100644
--- a/cpp/src/c_api/random_walks.cpp
+++ b/cpp/src/c_api/random_walks.cpp
@@ -154,7 +154,6 @@ namespace {
 
 struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
-  //  FIXME: rng_state_ should be passed as a parameter
   cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr};
   cugraph::c_api::cugraph_graph_t* graph_{nullptr};
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr};
@@ -162,11 +161,13 @@ struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor {
   cugraph::c_api::cugraph_random_walk_result_t* result_{nullptr};
 
   uniform_random_walks_functor(cugraph_resource_handle_t const* handle,
+                               cugraph_rng_state_t* rng_state,
                                cugraph_graph_t* graph,
                                cugraph_type_erased_device_array_view_t const* start_vertices,
                                size_t max_length)
     : abstract_functor(),
       handle_(*reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle)->handle_),
+      rng_state_(reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(rng_state)),
       graph_(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)),
       start_vertices_(
         reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
@@ -224,10 +225,6 @@ struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor {
         graph_view.local_vertex_partition_range_last(),
         false);
 
-      //  FIXME: remove once rng_state passed as parameter
-      rng_state_ = reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(
-        new cugraph::c_api::cugraph_rng_state_t{raft::random::RngState{0}});
-
       auto [paths, weights] = cugraph::uniform_random_walks(
         handle_,
         rng_state_->rng_state_,
@@ -261,7 +258,6 @@ struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor {
 
 struct biased_random_walks_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
-  //  FIXME: rng_state_ should be passed as a parameter
   cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr};
   cugraph::c_api::cugraph_graph_t* graph_{nullptr};
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr};
@@ -269,11 +265,13 @@ struct biased_random_walks_functor : public cugraph::c_api::abstract_functor {
   cugraph::c_api::cugraph_random_walk_result_t* result_{nullptr};
 
   biased_random_walks_functor(cugraph_resource_handle_t const* handle,
+                              cugraph_rng_state_t* rng_state,
                               cugraph_graph_t* graph,
                               cugraph_type_erased_device_array_view_t const* start_vertices,
                               size_t max_length)
     : abstract_functor(),
       handle_(*reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle)->handle_),
+      rng_state_(reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(rng_state)),
       graph_(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)),
       start_vertices_(
         reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
@@ -293,8 +291,6 @@ struct biased_random_walks_functor : public cugraph::c_api::abstract_functor {
     // FIXME: Think about how to handle SG vice MG
     if constexpr (!cugraph::is_candidate<vertex_t, edge_t, weight_t>::value) {
       unsupported();
-    } else if constexpr (multi_gpu) {
-      unsupported();
     } else {
       // random walks expects store_transposed == false
       if constexpr (store_transposed) {
@@ -333,10 +329,6 @@ struct biased_random_walks_functor : public cugraph::c_api::abstract_functor {
         graph_view.local_vertex_partition_range_last(),
         false);
 
-      //  FIXME: remove once rng_state passed as parameter
-      rng_state_ = reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(
-        new cugraph::c_api::cugraph_rng_state_t{raft::random::RngState{0}});
-
       auto [paths, weights] = cugraph::biased_random_walks(
         handle_,
         rng_state_->rng_state_,
@@ -541,6 +533,7 @@ void cugraph_random_walk_result_free(cugraph_random_walk_result_t* result)
 
 cugraph_error_code_t cugraph_uniform_random_walks(
   const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
   cugraph_graph_t* graph,
   const cugraph_type_erased_device_array_view_t* start_vertices,
   size_t max_length,
@@ -555,13 +548,14 @@ cugraph_error_code_t cugraph_uniform_random_walks(
                "vertex type of graph and start_vertices must match",
                *error);
 
-  uniform_random_walks_functor functor(handle, graph, start_vertices, max_length);
+  uniform_random_walks_functor functor(handle, rng_state, graph, start_vertices, max_length);
 
   return cugraph::c_api::run_algorithm(graph, functor, result, error);
 }
 
 cugraph_error_code_t cugraph_biased_random_walks(
   const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
   cugraph_graph_t* graph,
   const cugraph_type_erased_device_array_view_t* start_vertices,
   size_t max_length,
@@ -576,7 +570,7 @@ cugraph_error_code_t cugraph_biased_random_walks(
                "vertex type of graph and start_vertices must match",
                *error);
 
-  biased_random_walks_functor functor(handle, graph, start_vertices, max_length);
+  biased_random_walks_functor functor(handle, rng_state, graph, start_vertices, max_length);
 
   return cugraph::c_api::run_algorithm(graph, functor, result, error);
 }

From 755acc716b3bcb94ff538436ba34ff06073a635c Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 31 Dec 2024 15:51:58 -0800
Subject: [PATCH 21/60] update tests to support rng state as input

---
 cpp/tests/c_api/sg_random_walks_test.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/cpp/tests/c_api/sg_random_walks_test.c b/cpp/tests/c_api/sg_random_walks_test.c
index 71c76f3f94..8d7328cba9 100644
--- a/cpp/tests/c_api/sg_random_walks_test.c
+++ b/cpp/tests/c_api/sg_random_walks_test.c
@@ -66,8 +66,12 @@ int generic_uniform_random_walks_test(vertex_t* h_src,
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed.");
 
+  cugraph_rng_state_t* rng_state;
+  ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
+
   ret_code =
-    cugraph_uniform_random_walks(handle, graph, d_start_view, max_depth, &result, &ret_error);
+    cugraph_uniform_random_walks(handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "uniform_random_walks failed.");
@@ -188,9 +192,13 @@ int generic_biased_random_walks_test(vertex_t* h_src,
     handle, d_start_view, (byte_t*)h_start, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed.");
-
+  
+  cugraph_rng_state_t* rng_state;
+  ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
+  
   ret_code =
-    cugraph_biased_random_walks(handle, graph, d_start_view, max_depth, &result, &ret_error);
+    cugraph_biased_random_walks(handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_random_walks failed.");

From ef00fa5f83de00606beb24c1a99880f3536dc19f Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 31 Dec 2024 16:13:37 -0800
Subject: [PATCH 22/60] add biased random walks to the PLC API

---
 .../pylibcugraph/pylibcugraph/CMakeLists.txt  |   1 +
 python/pylibcugraph/pylibcugraph/__init__.py  |   2 +
 .../pylibcugraph/_cugraph_c/algorithms.pxd    |   2 +
 .../pylibcugraph/biased_random_walks.pyx      | 150 ++++++++++++++++++
 .../pylibcugraph/uniform_random_walks.pyx     |  21 ++-
 5 files changed, 174 insertions(+), 2 deletions(-)
 create mode 100644 python/pylibcugraph/pylibcugraph/biased_random_walks.pyx

diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
index 44963bdc5e..d453c62001 100644
--- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt
+++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
@@ -58,6 +58,7 @@ set(cython_sources
     biased_neighbor_sample.pyx
     negative_sampling.pyx
     uniform_random_walks.pyx
+    biased_random_walks.pyx
     utils.pyx
     weakly_connected_components.pyx
     replicate_edgelist.pyx
diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py
index cd5b23db1a..92c6459686 100644
--- a/python/pylibcugraph/pylibcugraph/__init__.py
+++ b/python/pylibcugraph/pylibcugraph/__init__.py
@@ -76,6 +76,8 @@
 
 from pylibcugraph.uniform_random_walks import uniform_random_walks
 
+from pylibcugraph.biased_random_walks import biased_random_walks
+
 from pylibcugraph.betweenness_centrality import betweenness_centrality
 
 from pylibcugraph.induced_subgraph import induced_subgraph
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
index a8e5bb7fdc..e043b7672b 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
@@ -363,6 +363,7 @@ cdef extern from "cugraph_c/algorithms.h":
     cdef cugraph_error_code_t \
         cugraph_uniform_random_walks(
             const cugraph_resource_handle_t* handle,
+            cugraph_rng_state_t* rng_state,
             cugraph_graph_t* graph,
             const cugraph_type_erased_device_array_view_t* start_vertices,
             size_t max_length,
@@ -374,6 +375,7 @@ cdef extern from "cugraph_c/algorithms.h":
     cdef cugraph_error_code_t \
         cugraph_biased_random_walks(
             const cugraph_resource_handle_t* handle,
+            cugraph_rng_state_t* rng_state,
             cugraph_graph_t* graph,
             const cugraph_type_erased_device_array_view_t* start_vertices,
             size_t max_length,
diff --git a/python/pylibcugraph/pylibcugraph/biased_random_walks.pyx b/python/pylibcugraph/pylibcugraph/biased_random_walks.pyx
new file mode 100644
index 0000000000..2f37de7e93
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/biased_random_walks.pyx
@@ -0,0 +1,150 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have cython use python 3 syntax
+# cython: language_level = 3
+
+from libc.stdint cimport uintptr_t
+
+from pylibcugraph._cugraph_c.resource_handle cimport (
+    cugraph_resource_handle_t,
+)
+from pylibcugraph._cugraph_c.error cimport (
+    cugraph_error_code_t,
+    cugraph_error_t,
+)
+from pylibcugraph._cugraph_c.array cimport (
+    cugraph_type_erased_device_array_view_t,
+    cugraph_type_erased_device_array_view_create,
+    cugraph_type_erased_device_array_view_free,
+)
+from pylibcugraph._cugraph_c.graph cimport (
+    cugraph_graph_t,
+)
+from pylibcugraph._cugraph_c.algorithms cimport (
+    cugraph_biased_random_walks,
+    cugraph_random_walk_result_t,
+    cugraph_random_walk_result_get_paths,
+    cugraph_random_walk_result_get_weights,
+    cugraph_random_walk_result_get_max_path_length,
+    cugraph_random_walk_result_free,
+)
+from pylibcugraph.resource_handle cimport (
+    ResourceHandle,
+)
+from pylibcugraph.graphs cimport (
+    _GPUGraph,
+)
+from pylibcugraph._cugraph_c.random cimport (
+    cugraph_rng_state_t
+)
+from pylibcugraph.random cimport (
+    CuGraphRandomState
+)
+from pylibcugraph.utils cimport (
+    assert_success,
+    copy_to_cupy_array,
+    assert_CAI_type,
+    get_c_type_from_numpy_type,
+)
+
+
+def biased_random_walks(ResourceHandle resource_handle,
+                         _GPUGraph input_graph,
+                         start_vertices,
+                         size_t max_length,
+                         random_state=None):
+    """
+    Compute biased random walks for each nodes in 'start_vertices'
+
+    Parameters
+    ----------
+    resource_handle: ResourceHandle
+        Handle to the underlying device and host resources needed for
+        referencing data and running algorithms.
+
+    input_graph : SGGraph or MGGraph
+        The input graph, for either Single or Multi-GPU operations.
+
+    start_vertices: device array type
+        Device array containing the list of starting vertices from which
+        to run the biased random walk
+
+    max_length: size_t
+        The maximum depth of the biased random walks
+    
+    random_state: int (Optional)
+        Random state to use when generating samples.  Optional argument,
+        defaults to a hash of process id, time, and hostname.
+        (See pylibcugraph.random.CuGraphRandomState)
+
+    Returns
+    -------
+    A tuple containing two device arrays and an size_t which are respectively
+    the vertices path, the edge path weights and the maximum path length
+
+    """
+    cdef cugraph_resource_handle_t* c_resource_handle_ptr = \
+        resource_handle.c_resource_handle_ptr
+    cdef cugraph_graph_t* c_graph_ptr = input_graph.c_graph_ptr
+
+    assert_CAI_type(start_vertices, "start_vertices")
+
+    cdef cugraph_random_walk_result_t* result_ptr
+    cdef cugraph_error_code_t error_code
+    cdef cugraph_error_t* error_ptr
+
+    cdef uintptr_t cai_start_ptr = \
+        start_vertices.__cuda_array_interface__["data"][0]
+
+    cdef cugraph_type_erased_device_array_view_t* weights_ptr
+
+    cdef cugraph_type_erased_device_array_view_t* start_ptr = \
+        cugraph_type_erased_device_array_view_create(
+            <void*>cai_start_ptr,
+            len(start_vertices),
+            get_c_type_from_numpy_type(start_vertices.dtype))
+    
+    cg_rng_state = CuGraphRandomState(resource_handle, random_state)
+    
+    cdef cugraph_rng_state_t* rng_state_ptr = \
+        cg_rng_state.rng_state_ptr
+
+    error_code = cugraph_biased_random_walks(
+        c_resource_handle_ptr,
+        rng_state_ptr,
+        c_graph_ptr,
+        start_ptr,
+        max_length,
+        &result_ptr,
+        &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_biased_random_walks")
+
+    cdef cugraph_type_erased_device_array_view_t* path_ptr = \
+        cugraph_random_walk_result_get_paths(result_ptr)
+
+    if input_graph.weights_view_ptr is NULL and input_graph.weights_view_ptr_ptr is NULL:
+        cupy_weights = None
+    else:
+        weights_ptr = cugraph_random_walk_result_get_weights(result_ptr)
+        cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr)
+
+    max_path_length = \
+        cugraph_random_walk_result_get_max_path_length(result_ptr)
+
+    cupy_paths = copy_to_cupy_array(c_resource_handle_ptr, path_ptr)
+
+    cugraph_random_walk_result_free(result_ptr)
+    cugraph_type_erased_device_array_view_free(start_ptr)
+
+    return (cupy_paths, cupy_weights, max_path_length)
diff --git a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx
index 677695f93a..95379254e4 100644
--- a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx
+++ b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx
@@ -45,6 +45,12 @@ from pylibcugraph.resource_handle cimport (
 from pylibcugraph.graphs cimport (
     _GPUGraph,
 )
+from pylibcugraph._cugraph_c.random cimport (
+    cugraph_rng_state_t
+)
+from pylibcugraph.random cimport (
+    CuGraphRandomState
+)
 from pylibcugraph.utils cimport (
     assert_success,
     copy_to_cupy_array,
@@ -56,7 +62,8 @@ from pylibcugraph.utils cimport (
 def uniform_random_walks(ResourceHandle resource_handle,
                          _GPUGraph input_graph,
                          start_vertices,
-                         size_t max_length):
+                         size_t max_length,
+                         random_state=None):
     """
     Compute uniform random walks for each nodes in 'start_vertices'
 
@@ -75,7 +82,11 @@ def uniform_random_walks(ResourceHandle resource_handle,
 
     max_length: size_t
         The maximum depth of the uniform random walks
-
+    
+    random_state: int (Optional)
+        Random state to use when generating samples.  Optional argument,
+        defaults to a hash of process id, time, and hostname.
+        (See pylibcugraph.random.CuGraphRandomState)
 
     Returns
     -------
@@ -103,9 +114,15 @@ def uniform_random_walks(ResourceHandle resource_handle,
             <void*>cai_start_ptr,
             len(start_vertices),
             get_c_type_from_numpy_type(start_vertices.dtype))
+    
+    cg_rng_state = CuGraphRandomState(resource_handle, random_state)
+    
+    cdef cugraph_rng_state_t* rng_state_ptr = \
+        cg_rng_state.rng_state_ptr
 
     error_code = cugraph_uniform_random_walks(
         c_resource_handle_ptr,
+        rng_state_ptr,
         c_graph_ptr,
         start_ptr,
         max_length,

From ae4833ca728b03a8959537722271407722fee97d Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 31 Dec 2024 16:34:22 -0800
Subject: [PATCH 23/60] add biased random walks to the python API

---
 python/cugraph/cugraph/__init__.py            |   1 +
 python/cugraph/cugraph/sampling/__init__.py   |   1 +
 .../cugraph/sampling/biased_random_walks.py   | 124 ++++++++++++++++++
 .../cugraph/sampling/uniform_random_walks.py  |  47 +------
 4 files changed, 127 insertions(+), 46 deletions(-)
 create mode 100644 python/cugraph/cugraph/sampling/biased_random_walks.py

diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py
index 3f960433ca..da26971cee 100644
--- a/python/cugraph/cugraph/__init__.py
+++ b/python/cugraph/cugraph/__init__.py
@@ -109,6 +109,7 @@
 from cugraph.sampling import (
     random_walks,
     uniform_random_walks,
+    biased_random_walks,
     rw_path,
     node2vec,
     uniform_neighbor_sample,
diff --git a/python/cugraph/cugraph/sampling/__init__.py b/python/cugraph/cugraph/sampling/__init__.py
index 723457d62c..16d68a9710 100644
--- a/python/cugraph/cugraph/sampling/__init__.py
+++ b/python/cugraph/cugraph/sampling/__init__.py
@@ -13,5 +13,6 @@
 
 from cugraph.sampling.random_walks import random_walks, rw_path
 from cugraph.sampling.uniform_random_walks import uniform_random_walks
+from cugraph.sampling.biased_random_walks import biased_random_walks
 from cugraph.sampling.node2vec import node2vec
 from cugraph.sampling.uniform_neighbor_sample import uniform_neighbor_sample
diff --git a/python/cugraph/cugraph/sampling/biased_random_walks.py b/python/cugraph/cugraph/sampling/biased_random_walks.py
new file mode 100644
index 0000000000..65b6524197
--- /dev/null
+++ b/python/cugraph/cugraph/sampling/biased_random_walks.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cudf
+import cupy as cp
+from pylibcugraph import ResourceHandle
+from pylibcugraph import (
+    biased_random_walks as pylibcugraph_biased_random_walks,
+)
+
+from cugraph.structure import Graph
+
+from typing import Union, Tuple
+
+
+def biased_random_walks(
+    G: Graph,
+    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame] = None,
+    max_depth: int = None,
+    random_state: int = None,
+) -> Tuple[cudf.Series, cudf.Series, Union[None, int, cudf.Series]]:
+    """
+    Compute biased random walks for each nodes in 'start_vertices'.
+    Vertices with no outgoing edges will be padded with -1 and the corresponding
+    edge weights with 0.0.
+
+    parameters
+    ----------
+    G : cuGraph.Graph
+        The graph can be either directed or undirected.
+
+    start_vertices : int or list or cudf.Series or cudf.DataFrame
+        A single node or a list or a cudf.Series of nodes from which to run
+        the random walks. In case of multi-column vertices it should be
+        a cudf.DataFrame
+
+    max_depth : int
+        The maximum depth of the random walks
+
+        The max depth is relative to the number of edges hence the vertex_paths size
+        is max_depth + 1. For instance, a 'max_depth' of 2 with only one seed will
+        result in a vertex_path of size 3.
+    
+    random_state: int, optional
+        Random seed to use when making sampling calls.
+
+
+    Returns
+    -------
+    vertex_paths : cudf.Series or cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    and
+
+    max_path_length : int
+        The maximum path length.
+
+    Examples
+    --------
+    >>> from cugraph.datasets import karate
+    >>> M = karate.get_edgelist(download=True)
+    >>> G = karate.get_graph()
+    >>> start_vertices = G.nodes()[:4]
+    >>> _, _, _ = cugraph.biased_random_walks(G, start_vertices, 3)
+
+    """
+
+    if max_depth is None:
+        raise TypeError("must specify a 'max_depth'")
+
+    if isinstance(start_vertices, int):
+        start_vertices = [start_vertices]
+
+    if isinstance(start_vertices, list):
+        # Ensure the 'start_vertices' have the same dtype as the edge list.
+        # Failing to do that may produce erroneous results.
+        vertex_dtype = G.edgelist.edgelist_df.dtypes.iloc[0]
+        start_vertices = cudf.Series(start_vertices, dtype=vertex_dtype)
+
+    if G.renumbered is True:
+        if isinstance(start_vertices, cudf.DataFrame):
+            start_vertices = G.lookup_internal_vertex_id(
+                start_vertices, start_vertices.columns
+            )
+        else:
+            start_vertices = G.lookup_internal_vertex_id(start_vertices)
+
+    vertex_paths, edge_wgt_paths, max_path_length = pylibcugraph_biased_random_walks(
+        resource_handle=ResourceHandle(),
+        input_graph=G._plc_graph,
+        start_vertices=start_vertices,
+        max_length=max_depth,
+        random_state=random_state
+    )
+
+    vertex_paths = cudf.Series(vertex_paths)
+
+    if G.renumbered:
+        df_ = cudf.DataFrame()
+        df_["vertex_paths"] = vertex_paths
+        df_ = G.unrenumber(df_, "vertex_paths", preserve_order=True)
+        vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
+
+    edge_wgt_paths = cudf.Series(edge_wgt_paths)
+
+    return (
+        vertex_paths,
+        edge_wgt_paths,
+        max_path_length,
+    )
diff --git a/python/cugraph/cugraph/sampling/uniform_random_walks.py b/python/cugraph/cugraph/sampling/uniform_random_walks.py
index 0efef29bbd..160bdc9c9f 100644
--- a/python/cugraph/cugraph/sampling/uniform_random_walks.py
+++ b/python/cugraph/cugraph/sampling/uniform_random_walks.py
@@ -104,6 +104,7 @@ def uniform_random_walks(
         input_graph=G._plc_graph,
         start_vertices=start_vertices,
         max_length=max_depth,
+        random_state=random_state
     )
 
     vertex_paths = cudf.Series(vertex_paths)
@@ -121,49 +122,3 @@ def uniform_random_walks(
         edge_wgt_paths,
         max_path_length,
     )
-
-
-def rw_path(
-    num_paths: int, sizes: cudf.Series
-) -> Tuple[cudf.Series, cudf.Series, cudf.Series]:
-    """
-    Retrieve more information on the obtained paths in case use_padding
-    is False.
-
-    parameters
-    ----------
-    num_paths: int
-        Number of paths in the random walk output.
-
-    sizes: cudf.Series
-        Path size returned in random walk output.
-
-    Returns
-    -------
-    path_data : cudf.DataFrame
-        Dataframe containing vertex path offsets, edge weight offsets and
-        edge weight sizes for each path.
-    """
-
-    vertex_offsets = cudf.Series(0, dtype=sizes.dtype)
-    vertex_offsets = cudf.concat(
-        [vertex_offsets, sizes.cumsum()[:-1]], ignore_index=True
-    )
-    weight_sizes = sizes - 1
-
-    weight_offsets = cudf.Series(0, dtype=sizes.dtype)
-    num_edges = vertex_offsets.diff()[1:] - 1
-
-    weight_offsets = cudf.concat(
-        [weight_offsets, num_edges.cumsum()], ignore_index=True
-    )
-    # FIXME: CUDF bug. concatenating two series of type int32 but get a CUDF of
-    # type 'int64' have to cast the results
-    weight_offsets = weight_offsets.astype(sizes.dtype)
-
-    path_data = cudf.DataFrame()
-    path_data["vertex_offsets"] = vertex_offsets
-    path_data["weight_sizes"] = weight_sizes
-    path_data["weight_offsets"] = weight_offsets
-
-    return path_data[:num_paths]

From 8314291b37b31e0479db4478c34825361cf49ba8 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 31 Dec 2024 17:32:24 -0800
Subject: [PATCH 24/60] update docstrings and init file

---
 python/cugraph/cugraph/__init__.py                   |  1 +
 python/cugraph/cugraph/dask/__init__.py              |  2 ++
 .../cugraph/dask/sampling/node2vec_random_walks.py   | 12 +++++++++---
 python/cugraph/cugraph/sampling/__init__.py          |  1 +
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py
index da26971cee..4f2c47cf41 100644
--- a/python/cugraph/cugraph/__init__.py
+++ b/python/cugraph/cugraph/__init__.py
@@ -110,6 +110,7 @@
     random_walks,
     uniform_random_walks,
     biased_random_walks,
+    node2vec_random_walks,
     rw_path,
     node2vec,
     uniform_neighbor_sample,
diff --git a/python/cugraph/cugraph/dask/__init__.py b/python/cugraph/cugraph/dask/__init__.py
index b8753fc461..3b63e0ebff 100644
--- a/python/cugraph/cugraph/dask/__init__.py
+++ b/python/cugraph/cugraph/dask/__init__.py
@@ -28,6 +28,8 @@
 from .components.connectivity import weakly_connected_components
 from .sampling.uniform_neighbor_sample import uniform_neighbor_sample
 from .sampling.random_walks import random_walks
+from .sampling.uniform_random_walks import uniform_random_walks
+from .sampling.biased_random_walks import biased_random_walks
 from .sampling.node2vec_random_walks import node2vec_random_walks
 from .centrality.eigenvector_centrality import eigenvector_centrality
 from .cores.core_number import core_number
diff --git a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
index 18171eda62..3e42880a7d 100644
--- a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
@@ -71,8 +71,9 @@ def node2vec_random_walks(
     random_state=None
 ):
     """
-    Computes random walks for each node in 'start_vertices', under the
-    node2vec sampling framework.
+    compute random walks under the node2vec sampling framework for each nodes in
+    'start_vertices' and returns a padded result along with the maximum path length.
+    Vertices with no outgoing edges will be padded with -1.
 
     parameters
     ----------
@@ -112,6 +113,11 @@ def node2vec_random_walks(
     edge_weight_paths: dask_cudf.Series
         Series containing the edge weights of edges represented by the
         returned vertex_paths
+    
+    and
+
+    max_path_length : int
+        The maximum path length.
     """
     client = default_client()
 
@@ -194,5 +200,5 @@ def node2vec_random_walks(
         ]
     )
 
-    return ddf_vertex_paths, ddf_edge_wgt_paths
+    return ddf_vertex_paths, ddf_edge_wgt_paths, max_depth
     
diff --git a/python/cugraph/cugraph/sampling/__init__.py b/python/cugraph/cugraph/sampling/__init__.py
index 16d68a9710..495483d135 100644
--- a/python/cugraph/cugraph/sampling/__init__.py
+++ b/python/cugraph/cugraph/sampling/__init__.py
@@ -14,5 +14,6 @@
 from cugraph.sampling.random_walks import random_walks, rw_path
 from cugraph.sampling.uniform_random_walks import uniform_random_walks
 from cugraph.sampling.biased_random_walks import biased_random_walks
+from cugraph.sampling.node2vec_random_walks import node2vec_random_walks
 from cugraph.sampling.node2vec import node2vec
 from cugraph.sampling.uniform_neighbor_sample import uniform_neighbor_sample

From 1603bcd7747a306a5a7dcadc8f2068e535f7eeb3 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 31 Dec 2024 17:34:00 -0800
Subject: [PATCH 25/60] fix typo

---
 python/cugraph/cugraph/sampling/node2vec_random_walks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cugraph/cugraph/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
index 530ce6ebd3..3f8a6bdb66 100644
--- a/python/cugraph/cugraph/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
@@ -13,7 +13,7 @@
 
 from pylibcugraph import (
     ResourceHandle,
-    node2vec as pylibcugraph_node2vec_random_walks,
+    node2vec_random_walks as pylibcugraph_node2vec_random_walks,
 )
 import warnings
 
@@ -41,7 +41,7 @@ def ensure_valid_dtype(input_graph, start_vertices):
     return start_vertices
 
 
-def node2vec(G, start_vertices, max_depth=1, p=1.0, q=1.0, random_state=None):
+def node2vec_random_walks(G, start_vertices, max_depth=1, p=1.0, q=1.0, random_state=None):
     """
     Computes random walks for each node in 'start_vertices', under the
     node2vec sampling framework.

From 0a03b290542538dd4ab1c02832404f05a4498935 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 31 Dec 2024 17:36:29 -0800
Subject: [PATCH 26/60] update copyright

---
 python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
index 3e42880a7d..2ba3a2d238 100644
--- a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 88e405df171c748da7973ec2ae0b2f77aad55c2f Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 31 Dec 2024 17:39:11 -0800
Subject: [PATCH 27/60] add mg implementation of biased and uniform random
 walks

---
 .../dask/sampling/biased_random_walks.py      | 171 ++++++++++++++++++
 .../dask/sampling/uniform_random_walks.py     | 171 ++++++++++++++++++
 2 files changed, 342 insertions(+)
 create mode 100644 python/cugraph/cugraph/dask/sampling/biased_random_walks.py
 create mode 100644 python/cugraph/cugraph/dask/sampling/uniform_random_walks.py

diff --git a/python/cugraph/cugraph/dask/sampling/biased_random_walks.py b/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
new file mode 100644
index 0000000000..a4dab3578a
--- /dev/null
+++ b/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
@@ -0,0 +1,171 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dask.distributed import wait, default_client
+import dask_cudf
+import cudf
+import operator as op
+from cugraph.dask.common.part_utils import (
+    persist_dask_df_equal_parts_per_worker,
+)
+
+from pylibcugraph import ResourceHandle
+
+from pylibcugraph import (
+    biased_random_walks as pylibcugraph_biased_random_walks,
+)
+
+from cugraph.dask.comms import comms as Comms
+
+
+def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
+    """
+    Creates cudf Series from cupy arrays from pylibcugraph wrapper
+    """
+
+    if is_vertex_paths and len(cp_paths) > 0:
+        if number_map.implementation.numbered:
+            df_ = cudf.DataFrame()
+            df_["vertex_paths"] = cp_paths
+            df_ = number_map.unrenumber(
+                df_, "vertex_paths", preserve_order=True
+            ).compute()
+            vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
+
+            return vertex_paths
+
+    return cudf.Series(cp_paths)
+
+
+def _call_plc_biased_random_walks(sID, mg_graph_x, st_x, max_depth, random_state):
+
+    return pylibcugraph_biased_random_walks(
+        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+        input_graph=mg_graph_x,
+        start_vertices=st_x,
+        max_length=max_depth,
+        random_state=random_state,
+    )
+
+
+def biased_random_walks(
+    input_graph,
+    start_vertices=None,
+    max_depth=None,
+    random_state=None
+):
+    """
+    compute random walks under the biased sampling framework for each nodes in
+    'start_vertices' and returns a padded result along with the maximum path length.
+    Vertices with no outgoing edges will be padded with -1.
+
+    parameters
+    ----------
+    input_graph : cuGraph.Graph
+        The graph can be either directed or undirected.
+
+    start_vertices : int or list or cudf.Series or cudf.DataFrame
+        A single node or a list or a cudf.Series of nodes from which to run
+        the random walks. In case of multi-column vertices it should be
+        a cudf.DataFrame
+
+    max_depth : int
+        The maximum depth of the random walks
+    
+    random_state: int, optional
+        Random seed to use when making sampling calls.
+
+
+    Returns
+    -------
+    vertex_paths : dask_cudf.Series or dask_cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: dask_cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    max_path_length : int
+        The maximum path length
+    """
+    client = default_client()
+    if isinstance(start_vertices, int):
+        start_vertices = [start_vertices]
+
+    if isinstance(start_vertices, list):
+        start_vertices = cudf.Series(start_vertices)
+
+    # start_vertices uses "external" vertex IDs, but if the graph has been
+    # renumbered, the start vertex IDs must also be renumbered.
+    if input_graph.renumbered:
+        # FIXME: This should match start_vertices type to the renumbered df type
+        # but verify that. If not retrieve the type and cast it when creating
+        # the dask_cudf from a cudf
+        start_vertices = input_graph.lookup_internal_vertex_id(start_vertices).compute()
+        start_vertices_type = input_graph.edgelist.edgelist_df.dtypes[0]
+    else:
+        # FIXME: Get the 'src' column names instead and retrieve the type
+        start_vertices_type = input_graph.input_df.dtypes.iloc[0]
+    start_vertices = dask_cudf.from_cudf(
+        start_vertices, npartitions=min(input_graph._npartitions, len(start_vertices))
+    )
+    start_vertices = start_vertices.astype(start_vertices_type)
+    start_vertices = persist_dask_df_equal_parts_per_worker(
+        start_vertices, client, return_type="dict"
+    )
+
+    result = [
+        client.submit(
+            _call_plc_biased_random_walks,
+            Comms.get_session_id(),
+            input_graph._plc_graph[w],
+            start_v[0] if start_v else cudf.Series(dtype=start_vertices_type),
+            max_depth,
+            random_state=random_state,
+            workers=[w],
+            allow_other_workers=False,
+        )
+        for w, start_v in start_vertices.items()
+    ]
+
+    wait(result)
+
+    result_vertex_paths = [client.submit(op.getitem, f, 0) for f in result]
+    result_edge_wgt_paths = [client.submit(op.getitem, f, 1) for f in result]
+
+    cudf_vertex_paths = [
+        client.submit(convert_to_cudf, cp_vertex_paths, input_graph.renumber_map, True)
+        for cp_vertex_paths in result_vertex_paths
+    ]
+
+    cudf_edge_wgt_paths = [
+        client.submit(convert_to_cudf, cp_edge_wgt_paths)
+        for cp_edge_wgt_paths in result_edge_wgt_paths
+    ]
+
+    wait([cudf_vertex_paths, cudf_edge_wgt_paths])
+
+    ddf_vertex_paths = dask_cudf.from_delayed(cudf_vertex_paths).persist()
+    ddf_edge_wgt_paths = dask_cudf.from_delayed(cudf_edge_wgt_paths).persist()
+    wait([ddf_vertex_paths, ddf_edge_wgt_paths])
+
+    # Wait until the inactive futures are released
+    wait(
+        [
+            (r.release(), c_v.release(), c_e.release())
+            for r, c_v, c_e in zip(result, cudf_vertex_paths, cudf_edge_wgt_paths)
+        ]
+    )
+
+    return ddf_vertex_paths, ddf_edge_wgt_paths, max_depth
diff --git a/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py b/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
new file mode 100644
index 0000000000..ba571a03e8
--- /dev/null
+++ b/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
@@ -0,0 +1,171 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dask.distributed import wait, default_client
+import dask_cudf
+import cudf
+import operator as op
+from cugraph.dask.common.part_utils import (
+    persist_dask_df_equal_parts_per_worker,
+)
+
+from pylibcugraph import ResourceHandle
+
+from pylibcugraph import (
+    uniform_random_walks as pylibcugraph_uniform_random_walks,
+)
+
+from cugraph.dask.comms import comms as Comms
+
+
+def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
+    """
+    Creates cudf Series from cupy arrays from pylibcugraph wrapper
+    """
+
+    if is_vertex_paths and len(cp_paths) > 0:
+        if number_map.implementation.numbered:
+            df_ = cudf.DataFrame()
+            df_["vertex_paths"] = cp_paths
+            df_ = number_map.unrenumber(
+                df_, "vertex_paths", preserve_order=True
+            ).compute()
+            vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
+
+            return vertex_paths
+
+    return cudf.Series(cp_paths)
+
+
+def _call_plc_uniform_random_walks(sID, mg_graph_x, st_x, max_depth, random_state):
+
+    return pylibcugraph_uniform_random_walks(
+        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+        input_graph=mg_graph_x,
+        start_vertices=st_x,
+        max_length=max_depth,
+        random_state=random_state,
+    )
+
+
+def uniform_random_walks(
+    input_graph,
+    start_vertices=None,
+    max_depth=None,
+    random_state=None
+):
+    """
+    compute random walks under the uniform sampling framework for each nodes in
+    'start_vertices' and returns a padded result along with the maximum path length.
+    Vertices with no outgoing edges will be padded with -1.
+
+    parameters
+    ----------
+    input_graph : cuGraph.Graph
+        The graph can be either directed or undirected.
+
+    start_vertices : int or list or cudf.Series or cudf.DataFrame
+        A single node or a list or a cudf.Series of nodes from which to run
+        the random walks. In case of multi-column vertices it should be
+        a cudf.DataFrame
+
+    max_depth : int
+        The maximum depth of the random walks
+    
+    random_state: int, optional
+        Random seed to use when making sampling calls.
+
+
+    Returns
+    -------
+    vertex_paths : dask_cudf.Series or dask_cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: dask_cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    max_path_length : int
+        The maximum path length
+    """
+    client = default_client()
+    if isinstance(start_vertices, int):
+        start_vertices = [start_vertices]
+
+    if isinstance(start_vertices, list):
+        start_vertices = cudf.Series(start_vertices)
+
+    # start_vertices uses "external" vertex IDs, but if the graph has been
+    # renumbered, the start vertex IDs must also be renumbered.
+    if input_graph.renumbered:
+        # FIXME: This should match start_vertices type to the renumbered df type
+        # but verify that. If not retrieve the type and cast it when creating
+        # the dask_cudf from a cudf
+        start_vertices = input_graph.lookup_internal_vertex_id(start_vertices).compute()
+        start_vertices_type = input_graph.edgelist.edgelist_df.dtypes[0]
+    else:
+        # FIXME: Get the 'src' column names instead and retrieve the type
+        start_vertices_type = input_graph.input_df.dtypes.iloc[0]
+    start_vertices = dask_cudf.from_cudf(
+        start_vertices, npartitions=min(input_graph._npartitions, len(start_vertices))
+    )
+    start_vertices = start_vertices.astype(start_vertices_type)
+    start_vertices = persist_dask_df_equal_parts_per_worker(
+        start_vertices, client, return_type="dict"
+    )
+
+    result = [
+        client.submit(
+            _call_plc_uniform_random_walks,
+            Comms.get_session_id(),
+            input_graph._plc_graph[w],
+            start_v[0] if start_v else cudf.Series(dtype=start_vertices_type),
+            max_depth,
+            random_state=random_state,
+            workers=[w],
+            allow_other_workers=False,
+        )
+        for w, start_v in start_vertices.items()
+    ]
+
+    wait(result)
+
+    result_vertex_paths = [client.submit(op.getitem, f, 0) for f in result]
+    result_edge_wgt_paths = [client.submit(op.getitem, f, 1) for f in result]
+
+    cudf_vertex_paths = [
+        client.submit(convert_to_cudf, cp_vertex_paths, input_graph.renumber_map, True)
+        for cp_vertex_paths in result_vertex_paths
+    ]
+
+    cudf_edge_wgt_paths = [
+        client.submit(convert_to_cudf, cp_edge_wgt_paths)
+        for cp_edge_wgt_paths in result_edge_wgt_paths
+    ]
+
+    wait([cudf_vertex_paths, cudf_edge_wgt_paths])
+
+    ddf_vertex_paths = dask_cudf.from_delayed(cudf_vertex_paths).persist()
+    ddf_edge_wgt_paths = dask_cudf.from_delayed(cudf_edge_wgt_paths).persist()
+    wait([ddf_vertex_paths, ddf_edge_wgt_paths])
+
+    # Wait until the inactive futures are released
+    wait(
+        [
+            (r.release(), c_v.release(), c_e.release())
+            for r, c_v, c_e in zip(result, cudf_vertex_paths, cudf_edge_wgt_paths)
+        ]
+    )
+
+    return ddf_vertex_paths, ddf_edge_wgt_paths, max_depth

From c8265e7cbb385d788ae862ccf11e9ab827a96b19 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 2 Jan 2025 08:05:50 -0800
Subject: [PATCH 28/60] update docstrings

---
 python/cugraph/cugraph/sampling/node2vec.py     |  4 ++--
 python/cugraph/cugraph/sampling/random_walks.py | 12 ++++++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/python/cugraph/cugraph/sampling/node2vec.py b/python/cugraph/cugraph/sampling/node2vec.py
index b5c93873fd..2a5c0f9389 100644
--- a/python/cugraph/cugraph/sampling/node2vec.py
+++ b/python/cugraph/cugraph/sampling/node2vec.py
@@ -113,8 +113,8 @@ def node2vec(G, start_vertices, max_depth=1, compress_result=True, p=1.0, q=1.0)
 
     """
     warning_msg = (
-            "random_walks is deprecated and will be removed "
-            "in the next release in favor of uniform_random_walks"
+            "node2vec is deprecated and will be removed "
+            "in the next release in favor of node2vec_random_walks"
         )
     warnings.warn(warning_msg, FutureWarning)
 
diff --git a/python/cugraph/cugraph/sampling/random_walks.py b/python/cugraph/cugraph/sampling/random_walks.py
index f0177d92c4..eaf0581724 100644
--- a/python/cugraph/cugraph/sampling/random_walks.py
+++ b/python/cugraph/cugraph/sampling/random_walks.py
@@ -97,10 +97,18 @@ def random_walks(
     use_padding : bool, optional (default=False)
         If True, padded paths are returned else coalesced paths are returned.
 
+        Deprecated: only padded paths will be returned in the results 
+
     legacy_result_type : bool, optional (default=True)
         If True, will return a tuple of vertex_paths, edge_weight_paths and
-        sizes. If False, will return a tuple of vertex_paths, vertex_paths and
-        max_path_length
+        sizes where the 'max_depth' is proportional to the number of vertices.
+        If False, will return a tuple of vertex_paths, vertex_paths and
+        max_path_length where the 'max_depth' is propotional to the number of
+        edges.
+
+        Deprecated: only padded paths will be returned where the 'max_depth'
+        is proportional to the number of edges instead of the number of
+        vertices when 'legacy_result_type' is 'True'.
 
     Returns
     -------

From 0a4d29baf994d6632f59db8a1cc3246b94cce464 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 2 Jan 2025 09:52:18 -0800
Subject: [PATCH 29/60] deprecate legacy implementation

---
 python/cugraph/cugraph/dask/sampling/random_walks.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/cugraph/cugraph/dask/sampling/random_walks.py b/python/cugraph/cugraph/dask/sampling/random_walks.py
index 99996153d3..07dfe93b16 100644
--- a/python/cugraph/cugraph/dask/sampling/random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/random_walks.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import warnings
 from dask.distributed import wait, default_client
 import dask_cudf
 import cudf
@@ -106,6 +107,13 @@ def random_walks(
     max_path_length : int
         The maximum path length
     """
+
+    warning_msg = (
+            "random_walks is deprecated and will be removed "
+            "in the next release in favor of uniform_random_walks"
+        )
+    warnings.warn(warning_msg, FutureWarning)
+
     client = default_client()
     if isinstance(start_vertices, int):
         start_vertices = [start_vertices]

From 4e0eff9e7ab68e3f3328779104aa2c176e974686 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 2 Jan 2025 13:13:30 -0800
Subject: [PATCH 30/60] remove unused import

---
 python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
index d1c4760d1f..59e7bd96c4 100644
--- a/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
+++ b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
@@ -17,7 +17,6 @@
 from libc.stdint cimport uintptr_t
 
 from pylibcugraph._cugraph_c.resource_handle cimport (
-    bool_t,
     cugraph_resource_handle_t,
 )
 from pylibcugraph._cugraph_c.error cimport (

From 7c85269f8d3d0774d05741e8e593b5b4cd87fc2c Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 10 Jan 2025 11:27:09 -0800
Subject: [PATCH 31/60] update MG C tests

---
 cpp/tests/c_api/mg_random_walks_test.c | 27 +++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/cpp/tests/c_api/mg_random_walks_test.c b/cpp/tests/c_api/mg_random_walks_test.c
index 13252e0f1d..525d340148 100644
--- a/cpp/tests/c_api/mg_random_walks_test.c
+++ b/cpp/tests/c_api/mg_random_walks_test.c
@@ -62,8 +62,15 @@ int generic_uniform_random_walks_test(const cugraph_resource_handle_t* handle,
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed.");
 
+  int rank = cugraph_resource_handle_get_rank(handle);
+  cugraph_rng_state_t* rng_state;
+  ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
   ret_code =
-    cugraph_uniform_random_walks(handle, graph, d_start_view, max_depth, &result, &ret_error);
+    cugraph_uniform_random_walks(
+      handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "uniform_random_walks failed.");
@@ -173,7 +180,14 @@ int generic_biased_random_walks_test(const cugraph_resource_handle_t* handle,
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed.");
 
-  ret_code = cugraph_biased_random_walks(handle, graph, d_start_view, FALSE, &result, &ret_error);
+  int rank = cugraph_resource_handle_get_rank(handle);
+  cugraph_rng_state_t* rng_state;
+  ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+  ret_code = cugraph_biased_random_walks(
+    handle, rng_state, graph, d_start_view, FALSE, &result, &ret_error);
 
 #if 1
   TEST_ASSERT(test_ret_value, ret_code != CUGRAPH_SUCCESS, "biased_random_walks should have failed")
@@ -277,8 +291,15 @@ int generic_node2vec_random_walks_test(const cugraph_resource_handle_t* handle,
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed.");
 
+  int rank = cugraph_resource_handle_get_rank(handle);
+  cugraph_rng_state_t* rng_state;
+  ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error);
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
+  TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
   ret_code =
-    cugraph_node2vec_random_walks(handle, graph, d_start_view, FALSE, p, q, &result, &ret_error);
+    cugraph_node2vec_random_walks(
+      handle, rng_state, graph, d_start_view, FALSE, p, q, &result, &ret_error);
 
 #if 1
   TEST_ASSERT(

From 067d53bbd8891232c0835047f5ef2998274e7af4 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 10 Jan 2025 12:30:26 -0800
Subject: [PATCH 32/60] remove unused variable and update the number of arrays
 passed at the graph creation

---
 cpp/tests/c_api/mg_test_utils.cpp | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/cpp/tests/c_api/mg_test_utils.cpp b/cpp/tests/c_api/mg_test_utils.cpp
index f96be61468..be6e869cf5 100644
--- a/cpp/tests/c_api/mg_test_utils.cpp
+++ b/cpp/tests/c_api/mg_test_utils.cpp
@@ -158,8 +158,6 @@ extern "C" int create_mg_test_graph(const cugraph_resource_handle_t* handle,
 
   rank = cugraph_resource_handle_get_rank(handle);
 
-  size_t original_num_edges = num_edges;
-
   if (rank != 0) num_edges = 0;
 
   ret_code =
@@ -191,6 +189,7 @@ extern "C" int create_mg_test_graph(const cugraph_resource_handle_t* handle,
     handle, wgt_view, (byte_t*)h_wgt, ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
 
+  printf("\n in 'mg_test_utils'\n");
   ret_code = cugraph_graph_create_mg(handle,
                                      &properties,
                                      NULL,
@@ -200,14 +199,15 @@ extern "C" int create_mg_test_graph(const cugraph_resource_handle_t* handle,
                                      NULL,
                                      NULL,
                                      store_transposed,
-                                     original_num_edges,  // UNUSED
-                                     FALSE,
+                                     size_t{1}, // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,
+                                     TRUE,
                                      p_graph,
                                      ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
+  printf("\n completed 'mg_test_utils'\n");
 
   cugraph_type_erased_device_array_view_free(wgt_view);
   cugraph_type_erased_device_array_view_free(dst_view);
@@ -256,8 +256,6 @@ extern "C" int create_mg_test_graph_double(const cugraph_resource_handle_t* hand
 
   rank = cugraph_resource_handle_get_rank(handle);
 
-  size_t original_num_edges = num_edges;
-
   if (rank != 0) num_edges = 0;
 
   ret_code =
@@ -298,7 +296,7 @@ extern "C" int create_mg_test_graph_double(const cugraph_resource_handle_t* hand
                                      NULL,
                                      NULL,
                                      store_transposed,
-                                     original_num_edges,  // UNUSED
+                                     size_t{1}, // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,
@@ -349,8 +347,6 @@ extern "C" int create_mg_test_graph_with_edge_ids(const cugraph_resource_handle_
 
   rank = cugraph_resource_handle_get_rank(handle);
 
-  size_t original_num_edges = num_edges;
-
   if (rank != 0) num_edges = 0;
 
   ret_code =
@@ -390,7 +386,7 @@ extern "C" int create_mg_test_graph_with_edge_ids(const cugraph_resource_handle_
                                      &idx_view,
                                      NULL,
                                      store_transposed,
-                                     original_num_edges,  // UNUSED
+                                     size_t{1}, // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,
@@ -449,8 +445,6 @@ extern "C" int create_mg_test_graph_with_properties(const cugraph_resource_handl
 
   rank = cugraph_resource_handle_get_rank(handle);
 
-  size_t original_num_edges = num_edges;
-
   if (rank != 0) num_edges = 0;
 
   ret_code =
@@ -517,7 +511,7 @@ extern "C" int create_mg_test_graph_with_properties(const cugraph_resource_handl
                                      &idx_view,
                                      &type_view,
                                      store_transposed,
-                                     original_num_edges,  // UNUSED
+                                     size_t{1}, // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,
@@ -581,8 +575,6 @@ int create_mg_test_graph_new(const cugraph_resource_handle_t* handle,
 
   rank = cugraph_resource_handle_get_rank(handle);
 
-  size_t original_num_edges = num_edges;
-
   if (rank != 0) num_edges = 0;
 
   ret_code =
@@ -650,7 +642,7 @@ int create_mg_test_graph_new(const cugraph_resource_handle_t* handle,
                                      &edge_id_view,
                                      &edge_type_view,
                                      store_transposed,
-                                     original_num_edges,  // UNUSED
+                                     size_t{1}, // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,

From b94a6eaadd131f6157342495bd9cda12505ef13f Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 10 Jan 2025 16:35:07 -0800
Subject: [PATCH 33/60] update copyright and remove debug print

---
 cpp/tests/c_api/mg_test_utils.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cpp/tests/c_api/mg_test_utils.cpp b/cpp/tests/c_api/mg_test_utils.cpp
index be6e869cf5..4767ac9853 100644
--- a/cpp/tests/c_api/mg_test_utils.cpp
+++ b/cpp/tests/c_api/mg_test_utils.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -189,7 +189,6 @@ extern "C" int create_mg_test_graph(const cugraph_resource_handle_t* handle,
     handle, wgt_view, (byte_t*)h_wgt, ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "wgt copy_from_host failed.");
 
-  printf("\n in 'mg_test_utils'\n");
   ret_code = cugraph_graph_create_mg(handle,
                                      &properties,
                                      NULL,
@@ -207,7 +206,6 @@ extern "C" int create_mg_test_graph(const cugraph_resource_handle_t* handle,
                                      p_graph,
                                      ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
-  printf("\n completed 'mg_test_utils'\n");
 
   cugraph_type_erased_device_array_view_free(wgt_view);
   cugraph_type_erased_device_array_view_free(dst_view);

From e83722e0720654ae8b390b92aae2a7863e4f0a66 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 10 Jan 2025 16:36:39 -0800
Subject: [PATCH 34/60] fix renumbering bug

---
 cpp/src/c_api/random_walks.cpp | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp
index c38db96baf..71fa7fdeb9 100644
--- a/cpp/src/c_api/random_walks.cpp
+++ b/cpp/src/c_api/random_walks.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -340,8 +340,13 @@ struct biased_random_walks_functor : public cugraph::c_api::abstract_functor {
       //
       // Need to unrenumber the vertices in the resulting paths
       //
-      cugraph::unrenumber_local_int_vertices<vertex_t>(
-        handle_, paths.data(), paths.size(), number_map->data(), 0, paths.size() - 1, false);
+      cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(
+        handle_,
+        paths.data(),
+        paths.size(),
+        number_map->data(),
+        graph_view.vertex_partition_range_lasts(),
+        false);
 
       result_ = new cugraph::c_api::cugraph_random_walk_result_t{
         false,
@@ -451,8 +456,13 @@ struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor {
       //
       // Need to unrenumber the vertices in the resulting paths
       //
-      cugraph::unrenumber_local_int_vertices<vertex_t>(
-        handle_, paths.data(), paths.size(), number_map->data(), 0, paths.size(), false);
+      cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(
+        handle_,
+        paths.data(),
+        paths.size(),
+        number_map->data(),
+        graph_view.vertex_partition_range_lasts(),
+        false);
 
       result_ = new cugraph::c_api::cugraph_random_walk_result_t{
         false,

From 2c1a034ae1f2c7ed77fbbbb54fa27897a2342e30 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 10 Jan 2025 16:39:01 -0800
Subject: [PATCH 35/60] enable MG tests and fix bugs

---
 cpp/tests/c_api/mg_random_walks_test.c | 111 +++++++++++++++----------
 1 file changed, 66 insertions(+), 45 deletions(-)

diff --git a/cpp/tests/c_api/mg_random_walks_test.c b/cpp/tests/c_api/mg_random_walks_test.c
index 525d340148..d5a02ba242 100644
--- a/cpp/tests/c_api/mg_random_walks_test.c
+++ b/cpp/tests/c_api/mg_random_walks_test.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -128,6 +128,7 @@ int generic_uniform_random_walks_test(const cugraph_resource_handle_t* handle,
                       "uniform_random_walks found no edge when an edge exists");
         }
       } else {
+        //printf("\na_ = %f, b_ = %f\n", M[h_result_verts[src_index]][h_result_verts[dst_index]], h_result_wgts[i * max_depth + j]);
         TEST_ASSERT(test_ret_value,
                     M[h_result_verts[src_index]][h_result_verts[dst_index]] ==
                       h_result_wgts[i * max_depth + j],
@@ -186,14 +187,12 @@ int generic_biased_random_walks_test(const cugraph_resource_handle_t* handle,
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
   TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
 
-  ret_code = cugraph_biased_random_walks(
-    handle, rng_state, graph, d_start_view, FALSE, &result, &ret_error);
+  ret_code =
+    cugraph_biased_random_walks(
+      handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
 
-#if 1
-  TEST_ASSERT(test_ret_value, ret_code != CUGRAPH_SUCCESS, "biased_random_walks should have failed")
-#else
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
-  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_random_walks failed.");
+  TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "uniform_random_walks failed.");
 
   cugraph_type_erased_device_array_view_t* verts;
   cugraph_type_erased_device_array_view_t* wgts;
@@ -205,10 +204,10 @@ int generic_biased_random_walks_test(const cugraph_resource_handle_t* handle,
   size_t wgts_size  = cugraph_type_erased_device_array_view_size(wgts);
 
   vertex_t h_result_verts[verts_size];
-  vertex_t h_result_wgts[wgts_size];
+  weight_t h_result_wgts[wgts_size];
 
-  ret_code =
-    cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_verts, verts, &ret_error);
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_result_verts, verts, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
 
   ret_code = cugraph_type_erased_device_array_view_copy_to_host(
@@ -228,24 +227,36 @@ int generic_biased_random_walks_test(const cugraph_resource_handle_t* handle,
     M[h_src[i]][h_dst[i]] = h_wgt[i];
 
   TEST_ASSERT(test_ret_value,
-              cugraph_random_walk_result_get_max_path_length() == max_depth,
+              cugraph_random_walk_result_get_max_path_length(result) == max_depth,
               "path length does not match");
 
   for (int i = 0; (i < num_starts) && (test_ret_value == 0); ++i) {
-    TEST_ASSERT(test_ret_value,
-                M[h_start[i]][h_result_verts[i * (max_depth + 1)]] == h_result_wgts[i * max_depth],
-                "biased_random_walks got edge that doesn't exist");
-    for (size_t j = 1; j < cugraph_random_walk_result_get_max_path_length(); ++j)
-      TEST_ASSERT(
-        test_ret_value,
-        M[h_start[i * (max_depth + 1) + j - 1]][h_result_verts[i * (max_depth + 1) + j]] ==
-          h_result_wgts[i * max_depth + j - 1],
-        "biased_random_walks got edge that doesn't exist");
+    TEST_ASSERT(
+      test_ret_value, h_start[i] == h_result_verts[i * (max_depth + 1)], "start of path not found");
+    for (size_t j = 0; j < max_depth; ++j) {
+      int src_index = i * (max_depth + 1) + j;
+      int dst_index = src_index + 1;
+      if (h_result_verts[dst_index] < 0) {
+        if (h_result_verts[src_index] >= 0) {
+          int departing_count = 0;
+          for (int k = 0; k < num_vertices; ++k) {
+            // edges with weight/bias value less than 0 will not be sampled.
+            if (M[h_result_verts[src_index]][k] > 0) departing_count++;
+          }
+          TEST_ASSERT(test_ret_value,
+                      departing_count == 0,
+                      "biased_random_walks found no edge when an edge exists");
+        }
+      } else {
+        TEST_ASSERT(test_ret_value,
+                    M[h_result_verts[src_index]][h_result_verts[dst_index]] ==
+                      h_result_wgts[i * max_depth + j],
+                    "biased_random_walks got edge that doesn't exist");
+      }
+    }
   }
 
   cugraph_random_walk_result_free(result);
-#endif
-
   cugraph_graph_free(graph);
   cugraph_error_free(ret_error);
 
@@ -299,12 +310,9 @@ int generic_node2vec_random_walks_test(const cugraph_resource_handle_t* handle,
 
   ret_code =
     cugraph_node2vec_random_walks(
-      handle, rng_state, graph, d_start_view, FALSE, p, q, &result, &ret_error);
+      handle, rng_state, graph, d_start_view, max_depth, p, q, &result, &ret_error);
+
 
-#if 1
-  TEST_ASSERT(
-    test_ret_value, ret_code != CUGRAPH_SUCCESS, "node2vec_random_walks should have failed")
-#else
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "node2vec_random_walks failed.");
 
@@ -318,10 +326,10 @@ int generic_node2vec_random_walks_test(const cugraph_resource_handle_t* handle,
   size_t wgts_size  = cugraph_type_erased_device_array_view_size(wgts);
 
   vertex_t h_result_verts[verts_size];
-  vertex_t h_result_wgts[wgts_size];
+  weight_t h_result_wgts[wgts_size];
 
   ret_code =
-    cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_verts, verts, &ret_error);
+    cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_result_verts, verts, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
 
   ret_code = cugraph_type_erased_device_array_view_copy_to_host(
@@ -341,24 +349,35 @@ int generic_node2vec_random_walks_test(const cugraph_resource_handle_t* handle,
     M[h_src[i]][h_dst[i]] = h_wgt[i];
 
   TEST_ASSERT(test_ret_value,
-              cugraph_random_walk_result_get_max_path_length() == max_depth,
+              cugraph_random_walk_result_get_max_path_length(result) == max_depth,
               "path length does not match");
 
   for (int i = 0; (i < num_starts) && (test_ret_value == 0); ++i) {
-    TEST_ASSERT(test_ret_value,
-                M[h_start[i]][h_result_verts[i * (max_depth + 1)]] == h_result_wgts[i * max_depth],
-                "node2vec_random_walks got edge that doesn't exist");
-    for (size_t j = 1; j < cugraph_random_walk_result_get_max_path_length(); ++j)
-      TEST_ASSERT(
-        test_ret_value,
-        M[h_start[i * (max_depth + 1) + j - 1]][h_result_verts[i * (max_depth + 1) + j]] ==
-          h_result_wgts[i * max_depth + j - 1],
-        "node2vec_random_walks got edge that doesn't exist");
+    TEST_ASSERT(
+      test_ret_value, h_start[i] == h_result_verts[i * (max_depth + 1)], "start of path not found");
+    for (size_t j = 0; j < max_depth; ++j) {
+      int src_index = i * (max_depth + 1) + j;
+      int dst_index = src_index + 1;
+      if (h_result_verts[dst_index] < 0) {
+        if (h_result_verts[src_index] >= 0) {
+          int departing_count = 0;
+          for (int k = 0; k < num_vertices; ++k) {
+            if (M[h_result_verts[src_index]][k] >= 0) departing_count++;
+          }
+          TEST_ASSERT(test_ret_value,
+                      departing_count == 0,
+                      "node2vec_random_walks found no edge when an edge exists");
+        }
+      } else {
+        TEST_ASSERT(test_ret_value,
+                    M[h_result_verts[src_index]][h_result_verts[dst_index]] ==
+                      h_result_wgts[i * max_depth + j],
+                    "node2vec_random_walks got edge that doesn't exist");
+      }
+    }
   }
 
   cugraph_random_walk_result_free(result);
-#endif
-
   cugraph_graph_free(graph);
   cugraph_error_free(ret_error);
 
@@ -386,14 +405,15 @@ int test_biased_random_walks(const cugraph_resource_handle_t* handle)
   size_t num_edges    = 8;
   size_t num_vertices = 6;
   size_t num_starts   = 2;
+  size_t max_depth    = 3;
 
   vertex_t src[]   = {0, 1, 1, 2, 2, 2, 3, 4};
   vertex_t dst[]   = {1, 3, 4, 0, 1, 3, 5, 5};
-  weight_t wgt[]   = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+  weight_t wgt[]   = {0, 1, 2, 3, 4, 5, 6, 7};
   vertex_t start[] = {2, 2};
 
   return generic_biased_random_walks_test(
-    handle, src, dst, wgt, num_vertices, num_edges, start, num_starts, FALSE, FALSE);
+    handle, src, dst, wgt, num_vertices, num_edges, start, num_starts, max_depth, FALSE);
 }
 
 int test_node2vec_random_walks(const cugraph_resource_handle_t* handle)
@@ -401,17 +421,18 @@ int test_node2vec_random_walks(const cugraph_resource_handle_t* handle)
   size_t num_edges    = 8;
   size_t num_vertices = 6;
   size_t num_starts   = 2;
+  size_t max_depth    = 3;
 
   vertex_t src[]   = {0, 1, 1, 2, 2, 2, 3, 4};
   vertex_t dst[]   = {1, 3, 4, 0, 1, 3, 5, 5};
-  weight_t wgt[]   = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+  weight_t wgt[]   = {0, 1, 2, 3, 4, 5, 6, 7};
   vertex_t start[] = {2, 2};
 
   weight_t p = 5;
   weight_t q = 8;
 
   return generic_node2vec_random_walks_test(
-    handle, src, dst, wgt, num_vertices, num_edges, start, num_starts, p, q, FALSE, FALSE);
+    handle, src, dst, wgt, num_vertices, num_edges, start, num_starts, p, q, max_depth, FALSE);
 }
 
 int main(int argc, char** argv)

From d521dfc8a40f229dbad50ac09b4efe470459149e Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 10 Jan 2025 16:41:56 -0800
Subject: [PATCH 36/60] fix style

---
 cpp/include/cugraph_c/sampling_algorithms.h   |  2 +-
 cpp/tests/c_api/mg_random_walks_test.c        | 23 ++++++++-----------
 cpp/tests/c_api/mg_test_utils.cpp             | 10 ++++----
 cpp/tests/c_api/sg_random_walks_test.c        | 12 +++++-----
 python/cugraph/cugraph/__init__.py            |  2 +-
 python/cugraph/cugraph/dask/__init__.py       |  2 +-
 .../dask/sampling/biased_random_walks.py      |  9 +++-----
 .../dask/sampling/node2vec_random_walks.py    | 22 +++++++-----------
 .../cugraph/dask/sampling/random_walks.py     |  8 +++----
 .../dask/sampling/uniform_random_walks.py     |  9 +++-----
 python/cugraph/cugraph/sampling/__init__.py   |  2 +-
 .../cugraph/sampling/biased_random_walks.py   |  6 ++---
 python/cugraph/cugraph/sampling/node2vec.py   |  8 +++----
 .../cugraph/sampling/node2vec_random_walks.py | 11 +++++----
 .../cugraph/cugraph/sampling/random_walks.py  | 22 ++++++++----------
 .../cugraph/sampling/uniform_random_walks.py  |  6 ++---
 .../pylibcugraph/pylibcugraph/CMakeLists.txt  |  2 +-
 python/pylibcugraph/pylibcugraph/__init__.py  |  2 +-
 .../pylibcugraph/_cugraph_c/algorithms.pxd    |  4 ++--
 .../pylibcugraph/biased_random_walks.pyx      |  8 +++----
 .../pylibcugraph/node2vec_random_walks.pyx    |  6 ++---
 .../pylibcugraph/uniform_random_walks.pyx     |  8 +++----
 22 files changed, 84 insertions(+), 100 deletions(-)

diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h
index 8c0040fff1..05639224aa 100644
--- a/cpp/include/cugraph_c/sampling_algorithms.h
+++ b/cpp/include/cugraph_c/sampling_algorithms.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/tests/c_api/mg_random_walks_test.c b/cpp/tests/c_api/mg_random_walks_test.c
index d5a02ba242..c62725029a 100644
--- a/cpp/tests/c_api/mg_random_walks_test.c
+++ b/cpp/tests/c_api/mg_random_walks_test.c
@@ -68,9 +68,8 @@ int generic_uniform_random_walks_test(const cugraph_resource_handle_t* handle,
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
   TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
 
-  ret_code =
-    cugraph_uniform_random_walks(
-      handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
+  ret_code = cugraph_uniform_random_walks(
+    handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "uniform_random_walks failed.");
@@ -128,7 +127,8 @@ int generic_uniform_random_walks_test(const cugraph_resource_handle_t* handle,
                       "uniform_random_walks found no edge when an edge exists");
         }
       } else {
-        //printf("\na_ = %f, b_ = %f\n", M[h_result_verts[src_index]][h_result_verts[dst_index]], h_result_wgts[i * max_depth + j]);
+        // printf("\na_ = %f, b_ = %f\n", M[h_result_verts[src_index]][h_result_verts[dst_index]],
+        // h_result_wgts[i * max_depth + j]);
         TEST_ASSERT(test_ret_value,
                     M[h_result_verts[src_index]][h_result_verts[dst_index]] ==
                       h_result_wgts[i * max_depth + j],
@@ -187,9 +187,8 @@ int generic_biased_random_walks_test(const cugraph_resource_handle_t* handle,
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
   TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
 
-  ret_code =
-    cugraph_biased_random_walks(
-      handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
+  ret_code = cugraph_biased_random_walks(
+    handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "uniform_random_walks failed.");
@@ -308,10 +307,8 @@ int generic_node2vec_random_walks_test(const cugraph_resource_handle_t* handle,
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
   TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
 
-  ret_code =
-    cugraph_node2vec_random_walks(
-      handle, rng_state, graph, d_start_view, max_depth, p, q, &result, &ret_error);
-
+  ret_code = cugraph_node2vec_random_walks(
+    handle, rng_state, graph, d_start_view, max_depth, p, q, &result, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "node2vec_random_walks failed.");
@@ -328,8 +325,8 @@ int generic_node2vec_random_walks_test(const cugraph_resource_handle_t* handle,
   vertex_t h_result_verts[verts_size];
   weight_t h_result_wgts[wgts_size];
 
-  ret_code =
-    cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_result_verts, verts, &ret_error);
+  ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+    handle, (byte_t*)h_result_verts, verts, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
 
   ret_code = cugraph_type_erased_device_array_view_copy_to_host(
diff --git a/cpp/tests/c_api/mg_test_utils.cpp b/cpp/tests/c_api/mg_test_utils.cpp
index 4767ac9853..3bb3970164 100644
--- a/cpp/tests/c_api/mg_test_utils.cpp
+++ b/cpp/tests/c_api/mg_test_utils.cpp
@@ -198,7 +198,7 @@ extern "C" int create_mg_test_graph(const cugraph_resource_handle_t* handle,
                                      NULL,
                                      NULL,
                                      store_transposed,
-                                     size_t{1}, // num_arrays
+                                     size_t{1},  // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,
@@ -294,7 +294,7 @@ extern "C" int create_mg_test_graph_double(const cugraph_resource_handle_t* hand
                                      NULL,
                                      NULL,
                                      store_transposed,
-                                     size_t{1}, // num_arrays
+                                     size_t{1},  // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,
@@ -384,7 +384,7 @@ extern "C" int create_mg_test_graph_with_edge_ids(const cugraph_resource_handle_
                                      &idx_view,
                                      NULL,
                                      store_transposed,
-                                     size_t{1}, // num_arrays
+                                     size_t{1},  // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,
@@ -509,7 +509,7 @@ extern "C" int create_mg_test_graph_with_properties(const cugraph_resource_handl
                                      &idx_view,
                                      &type_view,
                                      store_transposed,
-                                     size_t{1}, // num_arrays
+                                     size_t{1},  // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,
@@ -640,7 +640,7 @@ int create_mg_test_graph_new(const cugraph_resource_handle_t* handle,
                                      &edge_id_view,
                                      &edge_type_view,
                                      store_transposed,
-                                     size_t{1}, // num_arrays
+                                     size_t{1},  // num_arrays
                                      FALSE,
                                      FALSE,
                                      FALSE,
diff --git a/cpp/tests/c_api/sg_random_walks_test.c b/cpp/tests/c_api/sg_random_walks_test.c
index 8d7328cba9..a6aeeff58d 100644
--- a/cpp/tests/c_api/sg_random_walks_test.c
+++ b/cpp/tests/c_api/sg_random_walks_test.c
@@ -70,8 +70,8 @@ int generic_uniform_random_walks_test(vertex_t* h_src,
   ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
 
-  ret_code =
-    cugraph_uniform_random_walks(handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
+  ret_code = cugraph_uniform_random_walks(
+    handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "uniform_random_walks failed.");
@@ -192,13 +192,13 @@ int generic_biased_random_walks_test(vertex_t* h_src,
     handle, d_start_view, (byte_t*)h_start, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed.");
-  
+
   cugraph_rng_state_t* rng_state;
   ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error);
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
-  
-  ret_code =
-    cugraph_biased_random_walks(handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
+
+  ret_code = cugraph_biased_random_walks(
+    handle, rng_state, graph, d_start_view, max_depth, &result, &ret_error);
 
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_random_walks failed.");
diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py
index 4f2c47cf41..8aeba6a5d3 100644
--- a/python/cugraph/cugraph/__init__.py
+++ b/python/cugraph/cugraph/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/dask/__init__.py b/python/cugraph/cugraph/dask/__init__.py
index 3b63e0ebff..617eb25a2b 100644
--- a/python/cugraph/cugraph/dask/__init__.py
+++ b/python/cugraph/cugraph/dask/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/dask/sampling/biased_random_walks.py b/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
index a4dab3578a..965d119ed7 100644
--- a/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -60,10 +60,7 @@ def _call_plc_biased_random_walks(sID, mg_graph_x, st_x, max_depth, random_state
 
 
 def biased_random_walks(
-    input_graph,
-    start_vertices=None,
-    max_depth=None,
-    random_state=None
+    input_graph, start_vertices=None, max_depth=None, random_state=None
 ):
     """
     compute random walks under the biased sampling framework for each nodes in
@@ -82,7 +79,7 @@ def biased_random_walks(
 
     max_depth : int
         The maximum depth of the random walks
-    
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
diff --git a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
index 2ba3a2d238..3b800749b2 100644
--- a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -48,7 +48,9 @@ def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
     return cudf.Series(cp_paths)
 
 
-def _call_plc_node2vec_random_walks(sID, mg_graph_x, st_x, max_depth, p, q, random_state):
+def _call_plc_node2vec_random_walks(
+    sID, mg_graph_x, st_x, max_depth, p, q, random_state
+):
 
     return pylibcugraph_node2vec_random_walks(
         resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
@@ -57,18 +59,13 @@ def _call_plc_node2vec_random_walks(sID, mg_graph_x, st_x, max_depth, p, q, rand
         max_depth=max_depth,
         p=p,
         q=q,
-        random_state=random_state
+        random_state=random_state,
     )
 
 
 # FIXME: Add type anotation
 def node2vec_random_walks(
-    input_graph,
-    start_vertices=None,
-    max_depth=None,
-    p=1.0,
-    q=1.0,
-    random_state=None
+    input_graph, start_vertices=None, max_depth=None, p=1.0, q=1.0, random_state=None
 ):
     """
     compute random walks under the node2vec sampling framework for each nodes in
@@ -101,7 +98,7 @@ def node2vec_random_walks(
         is likelier to visit nodes closer to the outgoing node. If q < 1, the
         random walk is likelier to visit nodes further from the outgoing node.
         A positive float.
-    
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
@@ -113,7 +110,7 @@ def node2vec_random_walks(
     edge_weight_paths: dask_cudf.Series
         Series containing the edge weights of edges represented by the
         returned vertex_paths
-    
+
     and
 
     max_path_length : int
@@ -130,8 +127,6 @@ def node2vec_random_walks(
     if (not isinstance(q, float)) or (q <= 0.0):
         raise ValueError(f"'q' must be a positive float, got: {q}")
 
-
-
     if isinstance(start_vertices, int):
         start_vertices = [start_vertices]
 
@@ -201,4 +196,3 @@ def node2vec_random_walks(
     )
 
     return ddf_vertex_paths, ddf_edge_wgt_paths, max_depth
-    
diff --git a/python/cugraph/cugraph/dask/sampling/random_walks.py b/python/cugraph/cugraph/dask/sampling/random_walks.py
index 07dfe93b16..80c241a7fc 100644
--- a/python/cugraph/cugraph/dask/sampling/random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -109,9 +109,9 @@ def random_walks(
     """
 
     warning_msg = (
-            "random_walks is deprecated and will be removed "
-            "in the next release in favor of uniform_random_walks"
-        )
+        "random_walks is deprecated and will be removed "
+        "in the next release in favor of uniform_random_walks"
+    )
     warnings.warn(warning_msg, FutureWarning)
 
     client = default_client()
diff --git a/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py b/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
index ba571a03e8..ee5152bc8a 100644
--- a/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -60,10 +60,7 @@ def _call_plc_uniform_random_walks(sID, mg_graph_x, st_x, max_depth, random_stat
 
 
 def uniform_random_walks(
-    input_graph,
-    start_vertices=None,
-    max_depth=None,
-    random_state=None
+    input_graph, start_vertices=None, max_depth=None, random_state=None
 ):
     """
     compute random walks under the uniform sampling framework for each nodes in
@@ -82,7 +79,7 @@ def uniform_random_walks(
 
     max_depth : int
         The maximum depth of the random walks
-    
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
diff --git a/python/cugraph/cugraph/sampling/__init__.py b/python/cugraph/cugraph/sampling/__init__.py
index 495483d135..88439d779e 100644
--- a/python/cugraph/cugraph/sampling/__init__.py
+++ b/python/cugraph/cugraph/sampling/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/sampling/biased_random_walks.py b/python/cugraph/cugraph/sampling/biased_random_walks.py
index 65b6524197..9c70f21f63 100644
--- a/python/cugraph/cugraph/sampling/biased_random_walks.py
+++ b/python/cugraph/cugraph/sampling/biased_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -50,7 +50,7 @@ def biased_random_walks(
         The max depth is relative to the number of edges hence the vertex_paths size
         is max_depth + 1. For instance, a 'max_depth' of 2 with only one seed will
         result in a vertex_path of size 3.
-    
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
@@ -104,7 +104,7 @@ def biased_random_walks(
         input_graph=G._plc_graph,
         start_vertices=start_vertices,
         max_length=max_depth,
-        random_state=random_state
+        random_state=random_state,
     )
 
     vertex_paths = cudf.Series(vertex_paths)
diff --git a/python/cugraph/cugraph/sampling/node2vec.py b/python/cugraph/cugraph/sampling/node2vec.py
index 2a5c0f9389..f7a1d3aa64 100644
--- a/python/cugraph/cugraph/sampling/node2vec.py
+++ b/python/cugraph/cugraph/sampling/node2vec.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -113,9 +113,9 @@ def node2vec(G, start_vertices, max_depth=1, compress_result=True, p=1.0, q=1.0)
 
     """
     warning_msg = (
-            "node2vec is deprecated and will be removed "
-            "in the next release in favor of node2vec_random_walks"
-        )
+        "node2vec is deprecated and will be removed "
+        "in the next release in favor of node2vec_random_walks"
+    )
     warnings.warn(warning_msg, FutureWarning)
 
     if (not isinstance(max_depth, int)) or (max_depth < 1):
diff --git a/python/cugraph/cugraph/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
index 3f8a6bdb66..1f93f3c61d 100644
--- a/python/cugraph/cugraph/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -41,7 +41,9 @@ def ensure_valid_dtype(input_graph, start_vertices):
     return start_vertices
 
 
-def node2vec_random_walks(G, start_vertices, max_depth=1, p=1.0, q=1.0, random_state=None):
+def node2vec_random_walks(
+    G, start_vertices, max_depth=1, p=1.0, q=1.0, random_state=None
+):
     """
     Computes random walks for each node in 'start_vertices', under the
     node2vec sampling framework.
@@ -80,7 +82,7 @@ def node2vec_random_walks(G, start_vertices, max_depth=1, p=1.0, q=1.0, random_s
         is likelier to visit nodes closer to the outgoing node. If q < 1, the
         random walk is likelier to visit nodes further from the outgoing node.
         A positive float.
-    
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
@@ -117,7 +119,6 @@ def node2vec_random_walks(G, start_vertices, max_depth=1, p=1.0, q=1.0, random_s
     if (not isinstance(q, float)) or (q <= 0.0):
         raise ValueError(f"'q' must be a positive float, got: {q}")
 
-
     if isinstance(start_vertices, int):
         start_vertices = [start_vertices]
 
@@ -147,7 +148,7 @@ def node2vec_random_walks(G, start_vertices, max_depth=1, p=1.0, q=1.0, random_s
         max_depth=max_depth,
         p=p,
         q=q,
-        random_state=random_state
+        random_state=random_state,
     )
     vertex_set = cudf.Series(vertex_set)
     edge_set = cudf.Series(edge_set)
diff --git a/python/cugraph/cugraph/sampling/random_walks.py b/python/cugraph/cugraph/sampling/random_walks.py
index eaf0581724..09e2e4d2ab 100644
--- a/python/cugraph/cugraph/sampling/random_walks.py
+++ b/python/cugraph/cugraph/sampling/random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -97,7 +97,7 @@ def random_walks(
     use_padding : bool, optional (default=False)
         If True, padded paths are returned else coalesced paths are returned.
 
-        Deprecated: only padded paths will be returned in the results 
+        Deprecated: only padded paths will be returned in the results
 
     legacy_result_type : bool, optional (default=True)
         If True, will return a tuple of vertex_paths, edge_weight_paths and
@@ -137,9 +137,9 @@ def random_walks(
     """
 
     warning_msg = (
-            "random_walks is deprecated and will be removed "
-            "in the next release in favor of uniform_random_walks"
-        )
+        "random_walks is deprecated and will be removed "
+        "in the next release in favor of uniform_random_walks"
+    )
     warnings.warn(warning_msg, FutureWarning)
 
     # FIXME: Coalesced path results have been deprecated and should no longer be
@@ -151,7 +151,7 @@ def random_walks(
     # returning results paths proprtional to the number of edges. Furthermore,
     # Coalesced path results should also be removed in favor of always returning
     # padded results. The flags 'legacy_result_type' and 'use_padding" should be
-    # removed. 
+    # removed.
 
     if legacy_result_type or use_padding is False:
         warning_msg = (
@@ -160,7 +160,7 @@ def random_walks(
             "only padded paths will be returned instead"
         )
         warnings.warn(warning_msg, PendingDeprecationWarning)
-    
+
     if random_walks_type != "uniform":
         warning_msg = (
             "random_walks_type is deprecated and will be removed "
@@ -169,7 +169,6 @@ def random_walks(
         )
     warnings.warn(warning_msg, FutureWarning)
 
-
     if max_depth is None:
         raise TypeError("must specify a 'max_depth'")
 
@@ -180,7 +179,7 @@ def random_walks(
     # Consider a different return type if Nx types are passed in.
     # The new API for random walk should instead always return the triple
     # (vertex_paths, edge_wgt_paths, max_path_length)
-    
+
     G, _ = ensure_cugraph_obj_for_nx(G)
 
     if isinstance(start_vertices, int):
@@ -300,9 +299,8 @@ def rw_path(
     """
 
     warning_msg = (
-            "This method is deprecated in favor of always returning "
-            "padded results."
-        )
+        "This method is deprecated in favor of always returning " "padded results."
+    )
 
     warnings.warn(warning_msg, PendingDeprecationWarning)
 
diff --git a/python/cugraph/cugraph/sampling/uniform_random_walks.py b/python/cugraph/cugraph/sampling/uniform_random_walks.py
index 160bdc9c9f..ba4af23227 100644
--- a/python/cugraph/cugraph/sampling/uniform_random_walks.py
+++ b/python/cugraph/cugraph/sampling/uniform_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -50,7 +50,7 @@ def uniform_random_walks(
         The max depth is relative to the number of edges hence the vertex_paths size
         is max_depth + 1. For instance, a 'max_depth' of 2 with only one seed will
         result in a vertex_path of size 3.
-    
+
     random_state: int, optional
         Random seed to use when making sampling calls.
 
@@ -104,7 +104,7 @@ def uniform_random_walks(
         input_graph=G._plc_graph,
         start_vertices=start_vertices,
         max_length=max_depth,
-        random_state=random_state
+        random_state=random_state,
     )
 
     vertex_paths = cudf.Series(vertex_paths)
diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
index d453c62001..3da38aa630 100644
--- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt
+++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py
index 92c6459686..a532adf721 100644
--- a/python/pylibcugraph/pylibcugraph/__init__.py
+++ b/python/pylibcugraph/pylibcugraph/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
index e043b7672b..604ef77d6a 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -395,4 +395,4 @@ cdef extern from "cugraph_c/algorithms.h":
             double q,
             cugraph_random_walk_result_t** result,
             cugraph_error_t** error
-        )
\ No newline at end of file
+        )
diff --git a/python/pylibcugraph/pylibcugraph/biased_random_walks.pyx b/python/pylibcugraph/pylibcugraph/biased_random_walks.pyx
index 2f37de7e93..c6847af424 100644
--- a/python/pylibcugraph/pylibcugraph/biased_random_walks.pyx
+++ b/python/pylibcugraph/pylibcugraph/biased_random_walks.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -82,7 +82,7 @@ def biased_random_walks(ResourceHandle resource_handle,
 
     max_length: size_t
         The maximum depth of the biased random walks
-    
+
     random_state: int (Optional)
         Random state to use when generating samples.  Optional argument,
         defaults to a hash of process id, time, and hostname.
@@ -114,9 +114,9 @@ def biased_random_walks(ResourceHandle resource_handle,
             <void*>cai_start_ptr,
             len(start_vertices),
             get_c_type_from_numpy_type(start_vertices.dtype))
-    
+
     cg_rng_state = CuGraphRandomState(resource_handle, random_state)
-    
+
     cdef cugraph_rng_state_t* rng_state_ptr = \
         cg_rng_state.rng_state_ptr
 
diff --git a/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
index 59e7bd96c4..a8ce23a01a 100644
--- a/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
+++ b/python/pylibcugraph/pylibcugraph/node2vec_random_walks.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -94,7 +94,7 @@ def node2vec_random_walks(ResourceHandle resource_handle,
         further from the outgoing node. If q > 1, the random walk is likelier to
         visit nodes closer to the outgoing node. If q < 1, the random walk is
         likelier to visit nodes further from the outgoing node.
-    
+
     random_state: int (Optional)
         Random state to use when generating samples.  Optional argument,
         defaults to a hash of process id, time, and hostname.
@@ -152,7 +152,7 @@ def node2vec_random_walks(ResourceHandle resource_handle,
             get_c_type_from_numpy_type(seed_array.dtype))
 
     cg_rng_state = CuGraphRandomState(resource_handle, random_state)
-    
+
     cdef cugraph_rng_state_t* rng_state_ptr = \
         cg_rng_state.rng_state_ptr
 
diff --git a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx
index 95379254e4..f3889264c0 100644
--- a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx
+++ b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -82,7 +82,7 @@ def uniform_random_walks(ResourceHandle resource_handle,
 
     max_length: size_t
         The maximum depth of the uniform random walks
-    
+
     random_state: int (Optional)
         Random state to use when generating samples.  Optional argument,
         defaults to a hash of process id, time, and hostname.
@@ -114,9 +114,9 @@ def uniform_random_walks(ResourceHandle resource_handle,
             <void*>cai_start_ptr,
             len(start_vertices),
             get_c_type_from_numpy_type(start_vertices.dtype))
-    
+
     cg_rng_state = CuGraphRandomState(resource_handle, random_state)
-    
+
     cdef cugraph_rng_state_t* rng_state_ptr = \
         cg_rng_state.rng_state_ptr
 

From 6418b9695272b3029ea8d05da03229f8d48007af Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 10 Jan 2025 16:43:09 -0800
Subject: [PATCH 37/60] remove unsued import

---
 python/cugraph/cugraph/sampling/biased_random_walks.py  | 1 -
 python/cugraph/cugraph/sampling/uniform_random_walks.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/python/cugraph/cugraph/sampling/biased_random_walks.py b/python/cugraph/cugraph/sampling/biased_random_walks.py
index 9c70f21f63..41340cba8a 100644
--- a/python/cugraph/cugraph/sampling/biased_random_walks.py
+++ b/python/cugraph/cugraph/sampling/biased_random_walks.py
@@ -12,7 +12,6 @@
 # limitations under the License.
 
 import cudf
-import cupy as cp
 from pylibcugraph import ResourceHandle
 from pylibcugraph import (
     biased_random_walks as pylibcugraph_biased_random_walks,
diff --git a/python/cugraph/cugraph/sampling/uniform_random_walks.py b/python/cugraph/cugraph/sampling/uniform_random_walks.py
index ba4af23227..99d6695824 100644
--- a/python/cugraph/cugraph/sampling/uniform_random_walks.py
+++ b/python/cugraph/cugraph/sampling/uniform_random_walks.py
@@ -12,7 +12,6 @@
 # limitations under the License.
 
 import cudf
-import cupy as cp
 from pylibcugraph import ResourceHandle
 from pylibcugraph import (
     uniform_random_walks as pylibcugraph_uniform_random_walks,

From af3b31f62c4bd31a170fcc17294f6ae808201059 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Sun, 12 Jan 2025 17:40:25 -0800
Subject: [PATCH 38/60] add type annotations

---
 .../dask/sampling/biased_random_walks.py      | 30 ++++++++++++---
 .../dask/sampling/node2vec_random_walks.py    | 37 ++++++++++++++-----
 .../dask/sampling/uniform_random_walks.py     | 24 ++++++++++--
 3 files changed, 71 insertions(+), 20 deletions(-)

diff --git a/python/cugraph/cugraph/dask/sampling/biased_random_walks.py b/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
index 965d119ed7..9a70ab658c 100644
--- a/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
@@ -15,6 +15,7 @@
 from dask.distributed import wait, default_client
 import dask_cudf
 import cudf
+import cupy as cp
 import operator as op
 from cugraph.dask.common.part_utils import (
     persist_dask_df_equal_parts_per_worker,
@@ -27,9 +28,14 @@
 )
 
 from cugraph.dask.comms import comms as Comms
+from typing import Tuple, Union
 
 
-def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
+def convert_to_cudf(
+    cp_paths: cp.ndarray,
+    number_map=None,
+    is_vertex_paths: bool = False
+    ) -> cudf.Series:
     """
     Creates cudf Series from cupy arrays from pylibcugraph wrapper
     """
@@ -48,7 +54,13 @@ def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
     return cudf.Series(cp_paths)
 
 
-def _call_plc_biased_random_walks(sID, mg_graph_x, st_x, max_depth, random_state):
+def _call_plc_biased_random_walks(
+    sID: bytes,
+    mg_graph_x,
+    st_x: cudf.Series,
+    max_depth: int,
+    random_state: int
+    ) -> Tuple[cp.ndarray, cp.ndarray]:
 
     return pylibcugraph_biased_random_walks(
         resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
@@ -60,8 +72,12 @@ def _call_plc_biased_random_walks(sID, mg_graph_x, st_x, max_depth, random_state
 
 
 def biased_random_walks(
-    input_graph, start_vertices=None, max_depth=None, random_state=None
-):
+    input_graph,
+    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame, cudf.Series
+    ] = None,
+    max_depth: int = 1,
+    random_state: int = None
+) -> Tuple[Union[dask_cudf.Series, dask_cudf.DataFrame], dask_cudf.Series, int]:
     """
     compute random walks under the biased sampling framework for each nodes in
     'start_vertices' and returns a padded result along with the maximum path length.
@@ -77,8 +93,10 @@ def biased_random_walks(
         the random walks. In case of multi-column vertices it should be
         a cudf.DataFrame
 
-    max_depth : int
-        The maximum depth of the random walks
+    max_depth: int
+        The maximum depth of the random walks. If not specified, the maximum
+        depth is set to 1.
+        Must be a positive integer
 
     random_state: int, optional
         Random seed to use when making sampling calls.
diff --git a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
index 3b800749b2..d3e84c4c42 100644
--- a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
@@ -15,21 +15,26 @@
 from dask.distributed import wait, default_client
 import dask_cudf
 import cudf
+import cupy as cp
 import operator as op
 from cugraph.dask.common.part_utils import (
     persist_dask_df_equal_parts_per_worker,
 )
 
-from pylibcugraph import ResourceHandle
-
 from pylibcugraph import (
+    ResourceHandle,
     node2vec_random_walks as pylibcugraph_node2vec_random_walks,
 )
 
 from cugraph.dask.comms import comms as Comms
+from typing import Tuple, Union
 
 
-def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
+def convert_to_cudf(
+    cp_paths: cp.ndarray,
+    number_map=None,
+    is_vertex_paths: bool = False
+    ) -> cudf.Series:
     """
     Creates cudf Series from cupy arrays from pylibcugraph wrapper
     """
@@ -49,8 +54,14 @@ def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
 
 
 def _call_plc_node2vec_random_walks(
-    sID, mg_graph_x, st_x, max_depth, p, q, random_state
-):
+    sID: bytes,
+    mg_graph_x,
+    st_x: cudf.Series,
+    max_depth: int,
+    p: float, 
+    q: float,
+    random_state: int
+) -> Tuple[cp.ndarray, cp.ndarray]:
 
     return pylibcugraph_node2vec_random_walks(
         resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
@@ -63,10 +74,15 @@ def _call_plc_node2vec_random_walks(
     )
 
 
-# FIXME: Add type anotation
 def node2vec_random_walks(
-    input_graph, start_vertices=None, max_depth=None, p=1.0, q=1.0, random_state=None
-):
+    input_graph,
+    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame, cudf.Series
+    ] = None,
+    max_depth: int = 1,
+    p: float = 1.0,
+    q: float = 1.0,
+    random_state: int = None
+) -> Tuple[Union[dask_cudf.Series, dask_cudf.DataFrame], dask_cudf.Series, int]:
     """
     compute random walks under the node2vec sampling framework for each nodes in
     'start_vertices' and returns a padded result along with the maximum path length.
@@ -82,9 +98,10 @@ def node2vec_random_walks(
         the random walks. In case of multi-column vertices it should be
         a cudf.DataFrame. Only supports int32 currently.
 
-    max_depth: int, optional (default=1)
+    max_depth: int
         The maximum depth of the random walks. If not specified, the maximum
         depth is set to 1.
+        Must be a positive integer
 
     p: float, optional (default=1.0, [0 < p])
         Return factor, which represents the likelihood of backtracking to
@@ -158,7 +175,7 @@ def node2vec_random_walks(
             Comms.get_session_id(),
             input_graph._plc_graph[w],
             start_v[0] if start_v else cudf.Series(dtype=start_vertices_type),
-            max_depth,
+            max_depth if isinstance(max_depth, int) else 1,
             p=p,
             q=q,
             random_state=random_state,
diff --git a/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py b/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
index ee5152bc8a..429ec00ae0 100644
--- a/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
@@ -15,6 +15,7 @@
 from dask.distributed import wait, default_client
 import dask_cudf
 import cudf
+import cupy as cp
 import operator as op
 from cugraph.dask.common.part_utils import (
     persist_dask_df_equal_parts_per_worker,
@@ -27,9 +28,14 @@
 )
 
 from cugraph.dask.comms import comms as Comms
+from typing import Tuple, Union
 
 
-def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
+def convert_to_cudf(
+    cp_paths: cp.ndarray,
+    number_map=None,
+    is_vertex_paths: bool = False
+    ) -> cudf.Series:
     """
     Creates cudf Series from cupy arrays from pylibcugraph wrapper
     """
@@ -48,7 +54,13 @@ def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False):
     return cudf.Series(cp_paths)
 
 
-def _call_plc_uniform_random_walks(sID, mg_graph_x, st_x, max_depth, random_state):
+def _call_plc_uniform_random_walks(
+    sID: bytes,
+    mg_graph_x,
+    st_x: cudf.Series,
+    max_depth: int,
+    random_state: int
+    ) -> Tuple[cp.ndarray, cp.ndarray]:
 
     return pylibcugraph_uniform_random_walks(
         resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
@@ -60,8 +72,12 @@ def _call_plc_uniform_random_walks(sID, mg_graph_x, st_x, max_depth, random_stat
 
 
 def uniform_random_walks(
-    input_graph, start_vertices=None, max_depth=None, random_state=None
-):
+    input_graph,
+    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame, cudf.Series
+    ] = None,
+    max_depth: int = 1,
+    random_state: int = None
+) -> Tuple[Union[dask_cudf.Series, dask_cudf.DataFrame], dask_cudf.Series, int]:
     """
     compute random walks under the uniform sampling framework for each nodes in
     'start_vertices' and returns a padded result along with the maximum path length.

From f0e3b0faf05e03743853ca839b0178545a591edf Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Sun, 12 Jan 2025 17:44:43 -0800
Subject: [PATCH 39/60] fix style

---
 .../dask/sampling/biased_random_walks.py      | 19 ++++++-------------
 .../dask/sampling/node2vec_random_walks.py    | 15 ++++++---------
 .../dask/sampling/uniform_random_walks.py     | 19 ++++++-------------
 3 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/python/cugraph/cugraph/dask/sampling/biased_random_walks.py b/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
index 9a70ab658c..277dce6894 100644
--- a/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/biased_random_walks.py
@@ -32,10 +32,8 @@
 
 
 def convert_to_cudf(
-    cp_paths: cp.ndarray,
-    number_map=None,
-    is_vertex_paths: bool = False
-    ) -> cudf.Series:
+    cp_paths: cp.ndarray, number_map=None, is_vertex_paths: bool = False
+) -> cudf.Series:
     """
     Creates cudf Series from cupy arrays from pylibcugraph wrapper
     """
@@ -55,12 +53,8 @@ def convert_to_cudf(
 
 
 def _call_plc_biased_random_walks(
-    sID: bytes,
-    mg_graph_x,
-    st_x: cudf.Series,
-    max_depth: int,
-    random_state: int
-    ) -> Tuple[cp.ndarray, cp.ndarray]:
+    sID: bytes, mg_graph_x, st_x: cudf.Series, max_depth: int, random_state: int
+) -> Tuple[cp.ndarray, cp.ndarray]:
 
     return pylibcugraph_biased_random_walks(
         resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
@@ -73,10 +67,9 @@ def _call_plc_biased_random_walks(
 
 def biased_random_walks(
     input_graph,
-    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame, cudf.Series
-    ] = None,
+    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame, cudf.Series] = None,
     max_depth: int = 1,
-    random_state: int = None
+    random_state: int = None,
 ) -> Tuple[Union[dask_cudf.Series, dask_cudf.DataFrame], dask_cudf.Series, int]:
     """
     compute random walks under the biased sampling framework for each nodes in
diff --git a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
index d3e84c4c42..96582cdd7d 100644
--- a/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/node2vec_random_walks.py
@@ -31,10 +31,8 @@
 
 
 def convert_to_cudf(
-    cp_paths: cp.ndarray,
-    number_map=None,
-    is_vertex_paths: bool = False
-    ) -> cudf.Series:
+    cp_paths: cp.ndarray, number_map=None, is_vertex_paths: bool = False
+) -> cudf.Series:
     """
     Creates cudf Series from cupy arrays from pylibcugraph wrapper
     """
@@ -58,9 +56,9 @@ def _call_plc_node2vec_random_walks(
     mg_graph_x,
     st_x: cudf.Series,
     max_depth: int,
-    p: float, 
+    p: float,
     q: float,
-    random_state: int
+    random_state: int,
 ) -> Tuple[cp.ndarray, cp.ndarray]:
 
     return pylibcugraph_node2vec_random_walks(
@@ -76,12 +74,11 @@ def _call_plc_node2vec_random_walks(
 
 def node2vec_random_walks(
     input_graph,
-    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame, cudf.Series
-    ] = None,
+    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame, cudf.Series] = None,
     max_depth: int = 1,
     p: float = 1.0,
     q: float = 1.0,
-    random_state: int = None
+    random_state: int = None,
 ) -> Tuple[Union[dask_cudf.Series, dask_cudf.DataFrame], dask_cudf.Series, int]:
     """
     compute random walks under the node2vec sampling framework for each nodes in
diff --git a/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py b/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
index 429ec00ae0..dd2a069ff8 100644
--- a/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
+++ b/python/cugraph/cugraph/dask/sampling/uniform_random_walks.py
@@ -32,10 +32,8 @@
 
 
 def convert_to_cudf(
-    cp_paths: cp.ndarray,
-    number_map=None,
-    is_vertex_paths: bool = False
-    ) -> cudf.Series:
+    cp_paths: cp.ndarray, number_map=None, is_vertex_paths: bool = False
+) -> cudf.Series:
     """
     Creates cudf Series from cupy arrays from pylibcugraph wrapper
     """
@@ -55,12 +53,8 @@ def convert_to_cudf(
 
 
 def _call_plc_uniform_random_walks(
-    sID: bytes,
-    mg_graph_x,
-    st_x: cudf.Series,
-    max_depth: int,
-    random_state: int
-    ) -> Tuple[cp.ndarray, cp.ndarray]:
+    sID: bytes, mg_graph_x, st_x: cudf.Series, max_depth: int, random_state: int
+) -> Tuple[cp.ndarray, cp.ndarray]:
 
     return pylibcugraph_uniform_random_walks(
         resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
@@ -73,10 +67,9 @@ def _call_plc_uniform_random_walks(
 
 def uniform_random_walks(
     input_graph,
-    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame, cudf.Series
-    ] = None,
+    start_vertices: Union[int, list, cudf.Series, cudf.DataFrame, cudf.Series] = None,
     max_depth: int = 1,
-    random_state: int = None
+    random_state: int = None,
 ) -> Tuple[Union[dask_cudf.Series, dask_cudf.DataFrame], dask_cudf.Series, int]:
     """
     compute random walks under the uniform sampling framework for each nodes in

From 74648d45dcc417dfdd685b139447f82a700dc64c Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Sun, 12 Jan 2025 17:48:55 -0800
Subject: [PATCH 40/60] deprecated old test suite

---
 python/cugraph/cugraph/tests/sampling/test_random_walks.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_random_walks.py
index 76ceb47851..efb58e3ba1 100644
--- a/python/cugraph/cugraph/tests/sampling/test_random_walks.py
+++ b/python/cugraph/cugraph/tests/sampling/test_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.:
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.:
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -40,6 +40,8 @@ def setup_function():
     gc.collect()
 
 
+# FIXME: This test suite must be removed once random_walks is removed from
+# the python API in favor of uniform random walks
 def calc_random_walks(G, max_depth=None, use_padding=False, legacy_result_type=True):
     """
     compute random walks for each nodes in 'start_vertices'

From 70b8d8bcfdefce450b58dfaac700330faefb8c07 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Sun, 12 Jan 2025 18:09:18 -0800
Subject: [PATCH 41/60] add sg tests for uniform random walks

---
 .../sampling/test_uniform_random_walks.py     | 257 ++++++++++++++++++
 1 file changed, 257 insertions(+)
 create mode 100644 python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py

diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
new file mode 100644
index 0000000000..f537761574
--- /dev/null
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
@@ -0,0 +1,257 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.:
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import random
+
+import pytest
+import networkx as nx
+
+import cudf
+import cugraph
+from cudf.testing import assert_series_equal
+from cugraph.utilities import ensure_cugraph_obj_for_nx
+from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS
+
+
+# =============================================================================
+# Parameters
+# =============================================================================
+DIRECTED_GRAPH_OPTIONS = [False, True]
+WEIGHTED_GRAPH_OPTIONS = [False, True]
+DATASETS = [pytest.param(d) for d in DEFAULT_DATASETS]
+SMALL_DATASETS = [pytest.param(d) for d in SMALL_DATASETS]
+
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
+def calc_uniform_random_walks(G, max_depth=None):
+    """
+    compute random walks for each nodes in 'start_vertices'
+
+    parameters
+    ----------
+    G : cuGraph.Graph or networkx.Graph
+        The graph can be either directed or undirected.
+        Weights in the graph are ignored.
+        Use weight parameter if weights need to be considered
+        (currently not supported)
+
+    max_depth : int
+        The maximum depth of the random walks
+
+    Returns
+    -------
+    vertex_paths : cudf.Series or cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    sizes: int
+        The path size in case of coalesced paths.
+    """
+    assert G is not None
+
+    G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt")
+
+    k = random.randint(1, 6)
+
+    random_walks_type = "uniform"
+
+    start_vertices = G.select_random_vertices(num_vertices=k)
+
+    print("\nstart_vertices is \n", start_vertices)
+    vertex_paths, edge_weights, vertex_path_sizes = cugraph.uniform_random_walks(
+        G, start_vertices, max_depth
+    )
+
+    return (vertex_paths, edge_weights, vertex_path_sizes), start_vertices
+
+
+
+
+def check_random_walks(G, path_data, seeds, max_depth):
+    invalid_edge = 0
+    invalid_seeds = 0
+    invalid_edge_wgt = 0
+    v_paths = path_data[0]
+    e_wgt_paths = path_data[1]
+    e_wgt_idx = 0
+
+    G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt")
+    df_G = G.input_df
+
+    if "weight" in df_G.columns:
+        df_G = df_G.rename(columns={"weight": "wgt"})
+
+    total_depth = (max_depth) * len(seeds)
+
+    for i in range(total_depth - 1):
+        vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1]
+
+        # Every max_depth'th vertex in 'v_paths' is a seed
+        # instead of 'seeds[i // (max_depth)]', could have just pop the first element
+        # of the seeds array once there is a match and compare it to 'vertex_1'
+        if i % (max_depth) == 0 and vertex_1 != seeds[i // (max_depth)]:
+            invalid_seeds += 1
+            print(
+                "[ERR] Invalid seed: "
+                " src {} != src {}".format(vertex_1, seeds[i // (max_depth)])
+            )
+
+        if (i % (max_depth)) != (max_depth - 1):
+            # These are the edges
+            src = vertex_1
+            dst = vertex_2
+
+            if src != -1 and dst != -1:
+                # check for valid edge.
+                edge = df_G.loc[
+                    (df_G["src"] == (src)) & (df_G["dst"] == (dst))
+                ].reset_index(drop=True)
+
+                if len(edge) == 0:
+                    print(
+                        "[ERR] Invalid edge: "
+                        "There is no edge src {} dst {}".format(src, dst)
+                    )
+                    invalid_edge += 1
+
+                else:
+                    # check valid edge wgt
+                    if G.is_weighted():
+                        expected_wgt = edge["wgt"].iloc[0]
+                        result_wgt = e_wgt_paths.iloc[e_wgt_idx]
+
+                        if expected_wgt != result_wgt:
+                            print(
+                                "[ERR] Invalid edge wgt: "
+                                "The edge src {} dst {} has wgt {} but got {}".format(
+                                    src, dst, expected_wgt, result_wgt
+                                )
+                            )
+                            invalid_edge_wgt += 1
+            e_wgt_idx += 1
+
+            if src != -1 and dst == -1:
+                # ensure there is no outgoing edges from 'src'
+                assert G.out_degree([src])["degree"].iloc[0] == 0
+
+    assert invalid_seeds == 0
+    assert invalid_edge == 0
+    assert len(v_paths) == (max_depth) * len(seeds)
+    if G.is_weighted():
+        assert invalid_edge_wgt == 0
+        assert len(e_wgt_paths) == (max_depth - 1) * len(seeds)
+
+    
+    max_path_lenth = path_data[2]
+    assert max_path_lenth == max_depth - 1
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+@pytest.mark.parametrize("max_depth", [None])
+def test_uniform_random_walks_invalid_max_dept(graph_file, directed, max_depth):
+
+    input_graph = graph_file.get_graph(create_using=cugraph.Graph(directed=directed))
+    with pytest.raises(TypeError):
+        _, _, _ = calc_uniform_random_walks(input_graph, max_depth=max_depth)
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+def test_uniform_random_walks(graph_file, directed):
+    max_depth = random.randint(2, 10)
+    print("max_depth is ", max_depth)
+    input_graph = graph_file.get_graph(create_using=cugraph.Graph(directed=directed))
+
+    path_data, seeds = calc_uniform_random_walks(
+        input_graph, max_depth=max_depth
+    )
+
+    check_random_walks(input_graph, path_data, seeds, max_depth)
+
+    path_data, seeds = calc_uniform_random_walks(
+        input_graph, max_depth=max_depth
+    )
+
+    check_random_walks(input_graph, path_data, seeds, max_depth)
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+def test_uniform_random_walks_nx(graph_file):
+    G = graph_file.get_graph(create_using=cugraph.Graph(directed=True))
+
+    M = G.to_pandas_edgelist()
+
+    source = G.source_columns
+    target = G.destination_columns
+    edge_attr = G.weight_column
+
+    Gnx = nx.from_pandas_edgelist(
+        M,
+        source=source,
+        target=target,
+        edge_attr=edge_attr,
+        create_using=nx.DiGraph(),
+    )
+    max_depth = random.randint(2, 10)
+    path_data, seeds = calc_uniform_random_walks(Gnx, max_depth=max_depth)
+
+    check_random_walks(Gnx, path_data, seeds, max_depth)
+
+
+"""@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
+@pytest.mark.sg
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+def test_random_walks(
+    graph_file,
+    directed
+):
+    max_depth = random.randint(2, 10)
+    df_G = utils.read_csv_file(graph_file)
+    df_G.rename(
+        columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True)
+    df_G['src_0'] = df_G['src'] + 1000
+    df_G['dst_0'] = df_G['dst'] + 1000
+
+    if directed:
+        G = cugraph.Graph(directed=True)
+    else:
+        G = cugraph.Graph()
+    G.from_cudf_edgelist(df_G, source=['src', 'src_0'],
+                         destination=['dst', 'dst_0'],
+                         edge_attr="weight")
+
+    k = random.randint(1, 10)
+    start_vertices = random.sample(G.nodes().to_numpy().tolist(), k)
+
+    seeds = cudf.DataFrame()
+    seeds['v'] = start_vertices
+    seeds['v_0'] = seeds['v'] + 1000
+
+    df, offsets = cugraph.random_walks(G, seeds, max_depth)
+
+    check_random_walks(df, offsets, seeds, df_G)
+"""

From 9b270c70411baa92c0f9755a8d98dad33bfe8d67 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Sun, 12 Jan 2025 18:10:01 -0800
Subject: [PATCH 42/60] update copyright

---
 .../cugraph/cugraph/tests/sampling/test_uniform_random_walks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
index f537761574..78579aa40c 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.:
+# Copyright (c) 2025, NVIDIA CORPORATION.:
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at

From 9e850d24eec4ce3640500731b90c7bd6c9ecc79c Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Mon, 13 Jan 2025 20:16:51 -0800
Subject: [PATCH 43/60] update tests

---
 .../sampling/test_uniform_random_walks.py     | 53 ++++++++++---------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
index 78579aa40c..9d5fef1695 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
@@ -21,7 +21,7 @@
 import cugraph
 from cudf.testing import assert_series_equal
 from cugraph.utilities import ensure_cugraph_obj_for_nx
-from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS
+from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS, utils
 
 
 # =============================================================================
@@ -87,7 +87,7 @@ def calc_uniform_random_walks(G, max_depth=None):
 
 
 
-def check_random_walks(G, path_data, seeds, max_depth):
+def check_uniform_random_walks(G, path_data, seeds, max_depth):
     invalid_edge = 0
     invalid_seeds = 0
     invalid_edge_wgt = 0
@@ -103,20 +103,20 @@ def check_random_walks(G, path_data, seeds, max_depth):
 
     total_depth = (max_depth) * len(seeds)
 
-    for i in range(total_depth - 1):
+    for i in range(total_depth):
         vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1]
 
-        # Every max_depth'th vertex in 'v_paths' is a seed
-        # instead of 'seeds[i // (max_depth)]', could have just pop the first element
+        # Every max_depth'th vertex in 'v_paths' is a seed instead of
+        # 'seeds[i // (max_depth + 1)]', could have just pop the first element
         # of the seeds array once there is a match and compare it to 'vertex_1'
-        if i % (max_depth) == 0 and vertex_1 != seeds[i // (max_depth)]:
+        if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
             invalid_seeds += 1
             print(
                 "[ERR] Invalid seed: "
-                " src {} != src {}".format(vertex_1, seeds[i // (max_depth)])
+                " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
             )
 
-        if (i % (max_depth)) != (max_depth - 1):
+        if (i % (max_depth + 1)) != (max_depth):
             # These are the edges
             src = vertex_1
             dst = vertex_2
@@ -156,14 +156,14 @@ def check_random_walks(G, path_data, seeds, max_depth):
 
     assert invalid_seeds == 0
     assert invalid_edge == 0
-    assert len(v_paths) == (max_depth) * len(seeds)
+    assert len(v_paths) == (max_depth + 1) * len(seeds)
     if G.is_weighted():
         assert invalid_edge_wgt == 0
-        assert len(e_wgt_paths) == (max_depth - 1) * len(seeds)
+        assert len(e_wgt_paths) == (max_depth) * len(seeds)
 
     
     max_path_lenth = path_data[2]
-    assert max_path_lenth == max_depth - 1
+    assert max_path_lenth == max_depth
 
 
 @pytest.mark.sg
@@ -189,13 +189,10 @@ def test_uniform_random_walks(graph_file, directed):
         input_graph, max_depth=max_depth
     )
 
-    check_random_walks(input_graph, path_data, seeds, max_depth)
+    print("path_data = \n", path_data)
+    print("seeds = \n", seeds)
 
-    path_data, seeds = calc_uniform_random_walks(
-        input_graph, max_depth=max_depth
-    )
-
-    check_random_walks(input_graph, path_data, seeds, max_depth)
+    check_uniform_random_walks(input_graph, path_data, seeds, max_depth)
 
 
 @pytest.mark.sg
@@ -219,20 +216,21 @@ def test_uniform_random_walks_nx(graph_file):
     max_depth = random.randint(2, 10)
     path_data, seeds = calc_uniform_random_walks(Gnx, max_depth=max_depth)
 
-    check_random_walks(Gnx, path_data, seeds, max_depth)
+    check_uniform_random_walks(Gnx, path_data, seeds, max_depth)
 
 
-"""@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
+#"""@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
 @pytest.mark.sg
-@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+@pytest.mark.parametrize("graph_file", [SMALL_DATASETS[0]])
+@pytest.mark.parametrize("directed", [DIRECTED_GRAPH_OPTIONS[0]])
 def test_random_walks(
     graph_file,
     directed
 ):
     max_depth = random.randint(2, 10)
-    df_G = utils.read_csv_file(graph_file)
+    df_G = graph_file.get_edgelist()
     df_G.rename(
-        columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True)
+        columns={"wgt": "weight"}, inplace=True)
     df_G['src_0'] = df_G['src'] + 1000
     df_G['dst_0'] = df_G['dst'] + 1000
 
@@ -245,13 +243,16 @@ def test_random_walks(
                          edge_attr="weight")
 
     k = random.randint(1, 10)
-    start_vertices = random.sample(G.nodes().to_numpy().tolist(), k)
+    #start_vertices = random.sample(G.nodes().to_numpy().tolist(), k)
+
+    start_vertices = G.select_random_vertices(num_vertices=k)
 
     seeds = cudf.DataFrame()
     seeds['v'] = start_vertices
+    print("seeds = \n", seeds)
     seeds['v_0'] = seeds['v'] + 1000
 
-    df, offsets = cugraph.random_walks(G, seeds, max_depth)
+    df, offsets = cugraph.uniform-random_walks(G, seeds, max_depth)
 
-    check_random_walks(df, offsets, seeds, df_G)
-"""
+    check_uniform_random_walks(df, offsets, seeds, df_G)
+#"""

From 10c471afbe971ffab7fccc7bcdd46f0cec2f96dd Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 14 Jan 2025 09:20:31 -0800
Subject: [PATCH 44/60] add support of multi column seeds

---
 python/cugraph/cugraph/sampling/uniform_random_walks.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/cugraph/cugraph/sampling/uniform_random_walks.py b/python/cugraph/cugraph/sampling/uniform_random_walks.py
index 99d6695824..d0e71f401d 100644
--- a/python/cugraph/cugraph/sampling/uniform_random_walks.py
+++ b/python/cugraph/cugraph/sampling/uniform_random_walks.py
@@ -112,7 +112,10 @@ def uniform_random_walks(
         df_ = cudf.DataFrame()
         df_["vertex_paths"] = vertex_paths
         df_ = G.unrenumber(df_, "vertex_paths", preserve_order=True)
-        vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
+        if len(df_.columns) > 1:
+            vertex_paths = df_.fillna(-1)
+        else:
+            vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
 
     edge_wgt_paths = cudf.Series(edge_wgt_paths)
 

From b94bad22d33b48137bd13bd22a0176d1a0911a99 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 14 Jan 2025 09:21:57 -0800
Subject: [PATCH 45/60] add support of multi column seeds for
 'select_random_vertices'

---
 .../cugraph/structure/graph_implementation/simpleGraph.py    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py
index 4523b7f13b..d9beba70e4 100644
--- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py
+++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py
@@ -886,7 +886,10 @@ def select_random_vertices(
             df_ = cudf.DataFrame()
             df_["vertex"] = vertices
             df_ = self.renumber_map.unrenumber(df_, "vertex")
-            vertices = df_["vertex"]
+            if len(df_.columns) > 1:
+                vertices = df_
+            else:
+                vertices = df_["vertex"]
 
         return vertices
 

From af68c3f22357e5ec97b7a9df137c10c0a4008576 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 14 Jan 2025 09:22:47 -0800
Subject: [PATCH 46/60] add multi column tests

---
 .../sampling/test_uniform_random_walks.py     | 82 +++++++++++--------
 1 file changed, 49 insertions(+), 33 deletions(-)

diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
index 9d5fef1695..7fca738575 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
@@ -20,6 +20,7 @@
 import cudf
 import cugraph
 from cudf.testing import assert_series_equal
+from cudf.testing.testing import assert_frame_equal
 from cugraph.utilities import ensure_cugraph_obj_for_nx
 from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS, utils
 
@@ -104,25 +105,48 @@ def check_uniform_random_walks(G, path_data, seeds, max_depth):
     total_depth = (max_depth) * len(seeds)
 
     for i in range(total_depth):
-        vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1]
+        if isinstance(seeds, cudf.DataFrame):
+            vertex_1 = v_paths.iloc[[i]].reset_index(drop=True)
+            vertex_2 = v_paths.iloc[[i + 1]].reset_index(drop=True)
+        else:
+            vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1]
 
         # Every max_depth'th vertex in 'v_paths' is a seed instead of
         # 'seeds[i // (max_depth + 1)]', could have just pop the first element
         # of the seeds array once there is a match and compare it to 'vertex_1'
-        if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
-            invalid_seeds += 1
-            print(
-                "[ERR] Invalid seed: "
-                " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
-            )
+
+        if i % (max_depth + 1) == 0:
+            if isinstance(seeds, cudf.DataFrame):
+                assert_frame_equal(
+                    vertex_1.rename(columns={x:y for x,y in zip(vertex_1.columns,range(0,len(vertex_1.columns)))}),
+                    seeds.iloc[[i // (max_depth + 1)]].reset_index(drop=True).rename(columns={x:y for x,y in zip(seeds.columns,range(0,len(seeds.columns)))}),
+                    check_dtype=False, check_like=True)
+            else:
+                if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
+                    invalid_seeds += 1
+                    print(
+                        "[ERR] Invalid seed: "
+                        " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
+                )
 
         if (i % (max_depth + 1)) != (max_depth):
             # These are the edges
             src = vertex_1
             dst = vertex_2
-
-            if src != -1 and dst != -1:
-                # check for valid edge.
+            
+            # check for valid edge.
+            if isinstance(seeds, cudf.DataFrame):
+                if (-1 not in src.iloc[0].reset_index(drop=True)) and (-1 not in dst.iloc[0].reset_index(drop=True)):
+                    edge = cudf.DataFrame()
+                    edge["src"] = vertex_1["0_vertex_paths"]
+                    edge["src_0"] = vertex_1["1_vertex_paths"]
+                    edge["dst"] = vertex_2["0_vertex_paths"]
+                    edge["dst_0"] = vertex_2["1_vertex_paths"]
+
+                    join1 = cudf.merge(df_G, edge, on=[*edge.columns])
+                    
+                    assert len(cudf.merge(df_G, edge, on=[*edge.columns])) > 0
+            else:
                 edge = df_G.loc[
                     (df_G["src"] == (src)) & (df_G["dst"] == (dst))
                 ].reset_index(drop=True)
@@ -148,11 +172,11 @@ def check_uniform_random_walks(G, path_data, seeds, max_depth):
                                 )
                             )
                             invalid_edge_wgt += 1
-            e_wgt_idx += 1
+                e_wgt_idx += 1
 
-            if src != -1 and dst == -1:
-                # ensure there is no outgoing edges from 'src'
-                assert G.out_degree([src])["degree"].iloc[0] == 0
+                if src != -1 and dst == -1:
+                    # ensure there is no outgoing edges from 'src'
+                    assert G.out_degree([src])["degree"].iloc[0] == 0
 
     assert invalid_seeds == 0
     assert invalid_edge == 0
@@ -189,9 +213,6 @@ def test_uniform_random_walks(graph_file, directed):
         input_graph, max_depth=max_depth
     )
 
-    print("path_data = \n", path_data)
-    print("seeds = \n", seeds)
-
     check_uniform_random_walks(input_graph, path_data, seeds, max_depth)
 
 
@@ -219,11 +240,10 @@ def test_uniform_random_walks_nx(graph_file):
     check_uniform_random_walks(Gnx, path_data, seeds, max_depth)
 
 
-#"""@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
 @pytest.mark.sg
-@pytest.mark.parametrize("graph_file", [SMALL_DATASETS[0]])
-@pytest.mark.parametrize("directed", [DIRECTED_GRAPH_OPTIONS[0]])
-def test_random_walks(
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+def test_random_walks_multi_column_seeds(
     graph_file,
     directed
 ):
@@ -243,16 +263,12 @@ def test_random_walks(
                          edge_attr="weight")
 
     k = random.randint(1, 10)
-    #start_vertices = random.sample(G.nodes().to_numpy().tolist(), k)
 
-    start_vertices = G.select_random_vertices(num_vertices=k)
-
-    seeds = cudf.DataFrame()
-    seeds['v'] = start_vertices
-    print("seeds = \n", seeds)
-    seeds['v_0'] = seeds['v'] + 1000
-
-    df, offsets = cugraph.uniform-random_walks(G, seeds, max_depth)
-
-    check_uniform_random_walks(df, offsets, seeds, df_G)
-#"""
+    seeds = G.select_random_vertices(num_vertices=k)
+    vertex_paths, edge_weights, vertex_path_sizes = cugraph.uniform_random_walks(
+        G, seeds, max_depth)
+    
+    path_data = (vertex_paths, edge_weights, vertex_path_sizes)
+    
+    check_uniform_random_walks(G, path_data, seeds, max_depth)
+    
\ No newline at end of file

From 0c8f85edda2489846e4cd6b25312ccc132082b4d Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 14 Jan 2025 09:36:23 -0800
Subject: [PATCH 47/60] add support of multi column seeds

---
 .../cugraph/sampling/biased_random_walks.py    |  5 ++++-
 .../cugraph/sampling/node2vec_random_walks.py  | 18 +++++++++++-------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/python/cugraph/cugraph/sampling/biased_random_walks.py b/python/cugraph/cugraph/sampling/biased_random_walks.py
index 41340cba8a..ca35d88a3d 100644
--- a/python/cugraph/cugraph/sampling/biased_random_walks.py
+++ b/python/cugraph/cugraph/sampling/biased_random_walks.py
@@ -112,7 +112,10 @@ def biased_random_walks(
         df_ = cudf.DataFrame()
         df_["vertex_paths"] = vertex_paths
         df_ = G.unrenumber(df_, "vertex_paths", preserve_order=True)
-        vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
+        if len(df_.columns) > 1:
+            vertex_paths = df_.fillna(-1)
+        else:
+            vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
 
     edge_wgt_paths = cudf.Series(edge_wgt_paths)
 
diff --git a/python/cugraph/cugraph/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
index 1f93f3c61d..68e9f3f072 100644
--- a/python/cugraph/cugraph/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
@@ -141,7 +141,7 @@ def node2vec_random_walks(
 
     start_vertices = ensure_valid_dtype(G, start_vertices)
 
-    vertex_set, edge_set = pylibcugraph_node2vec_random_walks(
+    vertex_paths, edge_wgt_paths = pylibcugraph_node2vec_random_walks(
         resource_handle=ResourceHandle(),
         graph=G._plc_graph,
         seed_array=start_vertices,
@@ -150,12 +150,16 @@ def node2vec_random_walks(
         q=q,
         random_state=random_state,
     )
-    vertex_set = cudf.Series(vertex_set)
-    edge_set = cudf.Series(edge_set)
+    vertex_paths = cudf.Series(vertex_paths)
+    edge_wgt_paths = cudf.Series(edge_wgt_paths)
 
     if G.renumbered:
         df_ = cudf.DataFrame()
-        df_["vertex_set"] = vertex_set
-        df_ = G.unrenumber(df_, "vertex_set", preserve_order=True)
-        vertex_set = cudf.Series(df_["vertex_set"])
-    return vertex_set, edge_set, max_depth
+        df_["vertex_paths"] = vertex_paths
+        df_ = G.unrenumber(df_, "vertex_paths", preserve_order=True)
+        if len(df_.columns) > 1:
+            vertex_paths = df_.fillna(-1)
+        else:
+            vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
+    
+    return vertex_paths, edge_wgt_paths, max_depth

From 37d3a47bab12b6f0067c04f75fc767db7b224f13 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Tue, 14 Jan 2025 09:40:18 -0800
Subject: [PATCH 48/60] add test for biased random walks

---
 .../sampling/test_biased_random_walks.py      | 250 ++++++++++++++++++
 1 file changed, 250 insertions(+)
 create mode 100644 python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py

diff --git a/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py
new file mode 100644
index 0000000000..b6097aa1ed
--- /dev/null
+++ b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py
@@ -0,0 +1,250 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.:
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import random
+
+import pytest
+import networkx as nx
+
+import cudf
+import cugraph
+from cudf.testing import assert_series_equal
+from cudf.testing.testing import assert_frame_equal
+from cugraph.utilities import ensure_cugraph_obj_for_nx
+from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS, utils
+
+
+# =============================================================================
+# Parameters
+# =============================================================================
+DIRECTED_GRAPH_OPTIONS = [False, True]
+WEIGHTED_GRAPH_OPTIONS = [False, True]
+DATASETS = [pytest.param(d) for d in DEFAULT_DATASETS]
+SMALL_DATASETS = [pytest.param(d) for d in SMALL_DATASETS]
+
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
+def calc_biased_random_walks(G, max_depth=None):
+    """
+    compute random walks for each nodes in 'start_vertices'
+
+    parameters
+    ----------
+    G : cuGraph.Graph or networkx.Graph
+        The graph can be either directed or undirected.
+        Weights in the graph are ignored.
+        Use weight parameter if weights need to be considered
+        (currently not supported)
+
+    max_depth : int
+        The maximum depth of the random walks
+
+    Returns
+    -------
+    vertex_paths : cudf.Series or cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    sizes: int
+        The path size in case of coalesced paths.
+    """
+    assert G is not None
+
+    G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt")
+
+    k = random.randint(1, 6)
+
+    random_walks_type = "biased"
+
+    start_vertices = G.select_random_vertices(num_vertices=k)
+
+    print("\nstart_vertices is \n", start_vertices)
+    vertex_paths, edge_weights, vertex_path_sizes = cugraph.biased_random_walks(
+        G, start_vertices, max_depth
+    )
+
+    return (vertex_paths, edge_weights, vertex_path_sizes), start_vertices
+
+
+
+
+def check_biased_random_walks(G, path_data, seeds, max_depth):
+    invalid_edge = 0
+    invalid_seeds = 0
+    invalid_edge_wgt = 0
+    v_paths = path_data[0]
+    e_wgt_paths = path_data[1]
+    e_wgt_idx = 0
+
+    G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt")
+    df_G = G.input_df
+
+    if "weight" in df_G.columns:
+        df_G = df_G.rename(columns={"weight": "wgt"})
+
+    total_depth = (max_depth) * len(seeds)
+
+    for i in range(total_depth):
+        if isinstance(seeds, cudf.DataFrame):
+            vertex_1 = v_paths.iloc[[i]].reset_index(drop=True)
+            vertex_2 = v_paths.iloc[[i + 1]].reset_index(drop=True)
+        else:
+            vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1]
+
+        # Every max_depth'th vertex in 'v_paths' is a seed instead of
+        # 'seeds[i // (max_depth + 1)]', could have just pop the first element
+        # of the seeds array once there is a match and compare it to 'vertex_1'
+
+        if i % (max_depth + 1) == 0:
+            if isinstance(seeds, cudf.DataFrame):
+                assert_frame_equal(
+                    vertex_1.rename(columns={x:y for x,y in zip(vertex_1.columns,range(0,len(vertex_1.columns)))}),
+                    seeds.iloc[[i // (max_depth + 1)]].reset_index(drop=True).rename(columns={x:y for x,y in zip(seeds.columns,range(0,len(seeds.columns)))}),
+                    check_dtype=False, check_like=True)
+            else:
+                if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
+                    invalid_seeds += 1
+                    print(
+                        "[ERR] Invalid seed: "
+                        " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
+                )
+
+        if (i % (max_depth + 1)) != (max_depth):
+            # These are the edges
+            src = vertex_1
+            dst = vertex_2
+            
+            # check for valid edge.
+            if isinstance(seeds, cudf.DataFrame):
+                if (-1 not in src.iloc[0].reset_index(drop=True)) and (-1 not in dst.iloc[0].reset_index(drop=True)):
+                    edge = cudf.DataFrame()
+                    edge["src"] = vertex_1["0_vertex_paths"]
+                    edge["src_0"] = vertex_1["1_vertex_paths"]
+                    edge["dst"] = vertex_2["0_vertex_paths"]
+                    edge["dst_0"] = vertex_2["1_vertex_paths"]
+
+                    join1 = cudf.merge(df_G, edge, on=[*edge.columns])
+                    
+                    assert len(cudf.merge(df_G, edge, on=[*edge.columns])) > 0
+            else:
+                edge = df_G.loc[
+                    (df_G["src"] == (src)) & (df_G["dst"] == (dst))
+                ].reset_index(drop=True)
+
+                if len(edge) == 0:
+                    print(
+                        "[ERR] Invalid edge: "
+                        "There is no edge src {} dst {}".format(src, dst)
+                    )
+                    invalid_edge += 1
+
+                else:
+                    # check valid edge wgt
+                    if G.is_weighted():
+                        expected_wgt = edge["wgt"].iloc[0]
+                        result_wgt = e_wgt_paths.iloc[e_wgt_idx]
+
+                        if expected_wgt != result_wgt:
+                            print(
+                                "[ERR] Invalid edge wgt: "
+                                "The edge src {} dst {} has wgt {} but got {}".format(
+                                    src, dst, expected_wgt, result_wgt
+                                )
+                            )
+                            invalid_edge_wgt += 1
+                e_wgt_idx += 1
+
+                if src != -1 and dst == -1:
+                    # ensure there is no outgoing edges from 'src'
+                    assert G.out_degree([src])["degree"].iloc[0] == 0
+
+    assert invalid_seeds == 0
+    assert invalid_edge == 0
+    assert len(v_paths) == (max_depth + 1) * len(seeds)
+    if G.is_weighted():
+        assert invalid_edge_wgt == 0
+        assert len(e_wgt_paths) == (max_depth) * len(seeds)
+
+    
+    max_path_lenth = path_data[2]
+    assert max_path_lenth == max_depth
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+@pytest.mark.parametrize("max_depth", [None])
+def test_biased_random_walks_invalid_max_dept(graph_file, directed, max_depth):
+
+    input_graph = graph_file.get_graph(create_using=cugraph.Graph(directed=directed))
+    with pytest.raises(TypeError):
+        _, _, _ = calc_biased_random_walks(input_graph, max_depth=max_depth)
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+def test_biased_random_walks(graph_file, directed):
+    max_depth = random.randint(2, 10)
+    print("max_depth is ", max_depth)
+    input_graph = graph_file.get_graph(create_using=cugraph.Graph(directed=directed))
+
+    path_data, seeds = calc_biased_random_walks(
+        input_graph, max_depth=max_depth
+    )
+
+    check_biased_random_walks(input_graph, path_data, seeds, max_depth)
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+def test_random_walks_multi_column_seeds(
+    graph_file,
+    directed
+):
+    max_depth = random.randint(2, 10)
+    df_G = graph_file.get_edgelist()
+    df_G.rename(
+        columns={"wgt": "weight"}, inplace=True)
+    df_G['src_0'] = df_G['src'] + 1000
+    df_G['dst_0'] = df_G['dst'] + 1000
+
+    if directed:
+        G = cugraph.Graph(directed=True)
+    else:
+        G = cugraph.Graph()
+    G.from_cudf_edgelist(df_G, source=['src', 'src_0'],
+                         destination=['dst', 'dst_0'],
+                         edge_attr="weight")
+
+    k = random.randint(1, 10)
+
+    seeds = G.select_random_vertices(num_vertices=k)
+    vertex_paths, edge_weights, vertex_path_sizes = cugraph.biased_random_walks(
+        G, seeds, max_depth)
+    
+    path_data = (vertex_paths, edge_weights, vertex_path_sizes)
+    
+    check_biased_random_walks(G, path_data, seeds, max_depth)
+    
\ No newline at end of file

From e7a5952e4a11582994a79aad4d3e7c74255047f1 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Wed, 15 Jan 2025 07:38:25 -0800
Subject: [PATCH 49/60] add mg ECG

---
 python/cugraph/cugraph/dask/community/ecg.py | 219 +++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 python/cugraph/cugraph/dask/community/ecg.py

diff --git a/python/cugraph/cugraph/dask/community/ecg.py b/python/cugraph/cugraph/dask/community/ecg.py
new file mode 100644
index 0000000000..8556b4da78
--- /dev/null
+++ b/python/cugraph/cugraph/dask/community/ecg.py
@@ -0,0 +1,219 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import annotations
+
+from dask.distributed import wait, default_client
+import cugraph.dask.comms.comms as Comms
+import dask_cudf
+import dask
+from dask import delayed
+import cudf
+
+from pylibcugraph import ResourceHandle
+from pylibcugraph import ecg as pylibcugraph_ecg
+import numpy
+import cupy as cp
+from typing import Tuple, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from cugraph import Graph
+
+
+def convert_to_cudf(result: cp.ndarray) -> Tuple[cudf.DataFrame, float]:
+    """
+    Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper
+    """
+    cupy_vertex, cupy_partition, modularity = result
+    df = cudf.DataFrame()
+    df["vertex"] = cupy_vertex
+    df["partition"] = cupy_partition
+
+    return df, modularity
+
+
+def _call_plc_ecg(
+    sID: bytes,
+    mg_graph_x,
+    max_iter: int,
+    resolution: int,
+    random_state: int,
+    theta: int,
+    do_expensive_check: bool,
+) -> Tuple[cp.ndarray, cp.ndarray, float]:
+    return pylibcugraph_ecg(
+        resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
+        random_state=random_state,
+        graph=mg_graph_x,
+        max_level=max_iter,
+        resolution=resolution,
+        theta=theta,
+        do_expensive_check=do_expensive_check,
+    )
+
+
+def ecg(
+    input_graph,
+    min_weight: float = 0.0001,
+    ensemble_size: int = 100,
+    max_level: int = 10,
+    threshold: float = 1e-7,
+    resolution: float = 1.0,
+    random_state: int = None,
+    weight=None,
+) -> Tuple[dask_cudf.DataFrame, float]:
+    """
+    Compute the Ensemble Clustering for Graphs (ECG) partition of the input
+    graph. ECG runs truncated Louvain on an ensemble of permutations of the
+    input graph, then uses the ensemble partitions to determine weights for
+    the input graph. The final result is found by running full Louvain on
+    the input graph using the determined weights.
+
+    See https://arxiv.org/abs/1809.05578 for further information.
+
+    Parameters
+    ----------
+    input_graph : cugraph.Graph or NetworkX Graph
+        The graph descriptor should contain the connectivity information
+        and weights. The adjacency list will be computed if not already
+        present.
+
+    min_weight : float, optional (default=0.5)
+        The minimum value to assign as an edgeweight in the ECG algorithm.
+        It should be a value in the range [0,1] usually left as the default
+        value of .05
+
+    ensemble_size : integer, optional (default=16)
+        The number of graph permutations to use for the ensemble.
+        The default value is 16, larger values may produce higher quality
+        partitions for some graphs.
+
+    max_level : integer, optional (default=100)
+        This controls the maximum number of levels/iterations of the ECG
+        algorithm. When specified the algorithm will terminate after no more
+        than the specified number of iterations. No error occurs when the
+        algorithm terminates early in this manner.
+
+    threshold: float
+        Modularity gain threshold for each level. If the gain of
+        modularity between 2 levels of the algorithm is less than the
+        given threshold then the algorithm stops and returns the
+        resulting communities.
+        Defaults to 1e-7.
+
+    resolution: float, optional (default=1.0)
+        Called gamma in the modularity formula, this changes the size
+        of the communities.  Higher resolutions lead to more smaller
+        communities, lower resolutions lead to fewer larger communities.
+        Defaults to 1.
+
+    random_state: int, optional(default=None)
+        Random state to use when generating samples.  Optional argument,
+        defaults to a hash of process id, time, and hostname.
+
+    weight : str, optional (default=None)
+        Deprecated.
+        This parameter is here for NetworkX compatibility and
+        represents which NetworkX data column represents Edge weights.
+
+    Returns
+    -------
+    parts : dask_cudf.DataFrame
+        GPU data frame of size V containing two columns the vertex id and the
+        partition id it is assigned to.
+
+        ddf['vertex'] : cudf.Series
+            Contains the vertex identifiers
+        ddf['partition'] : cudf.Series
+            Contains the partition assigned to the vertices
+
+    modularity_score : float
+        a floating point number containing the global modularity score of the
+        partitioning.
+
+    Examples
+    --------
+    >>> import cugraph.dask as dcg
+    >>> import dask_cudf
+    >>> # ... Init a DASK Cluster
+    >>> #    see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html
+    >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
+    >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
+    >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
+    ...                          blocksize=chunksize, delimiter=" ",
+    ...                          names=["src", "dst", "value"],
+    ...                          dtype=["int32", "int32", "float32"])
+    >>> dg = cugraph.Graph()
+    >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst')
+    >>> parts, modularity_score = dcg.ecg(dg)
+
+    """
+
+    if input_graph.is_directed():
+        raise ValueError("input graph must be undirected")
+
+    # Return a client if one has started
+    client = default_client()
+
+    do_expensive_check = False
+
+    result = [
+        client.submit(
+            _call_plc_ecg,
+            Comms.get_session_id(),
+            input_graph._plc_graph[w],
+            max_iter,
+            resolution,
+            random_state,
+            theta,
+            do_expensive_check,
+            workers=[w],
+            allow_other_workers=False,
+        )
+        for w in Comms.get_workers()
+    ]
+
+    wait(result)
+
+    part_mod_score = [client.submit(convert_to_cudf, r) for r in result]
+    wait(part_mod_score)
+
+    vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0]
+    empty_df = cudf.DataFrame(
+        {
+            "vertex": numpy.empty(shape=0, dtype=vertex_dtype),
+            "partition": numpy.empty(shape=0, dtype="int32"),
+        }
+    )
+
+    part_mod_score = [delayed(lambda x: x, nout=2)(r) for r in part_mod_score]
+
+    ddf = dask_cudf.from_delayed(
+        [r[0] for r in part_mod_score], meta=empty_df, verify_meta=False
+    ).persist()
+
+    mod_score = dask.array.from_delayed(
+        part_mod_score[0][1], shape=(1,), dtype=float
+    ).compute()
+
+    wait(ddf)
+    wait(mod_score)
+
+    wait([r.release() for r in part_mod_score])
+
+    if input_graph.renumbered:
+        ddf = input_graph.unrenumber(ddf, "vertex")
+
+    return ddf, mod_score

From dfe8dd2193e10e50cababc6a49f84ef2e38160f1 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Wed, 15 Jan 2025 19:34:55 -0800
Subject: [PATCH 50/60] update MG ecg implementation

---
 python/cugraph/cugraph/dask/__init__.py       |  1 +
 .../cugraph/dask/community/__init__.py        |  1 +
 python/cugraph/cugraph/dask/community/ecg.py  | 28 +++++++++----------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/python/cugraph/cugraph/dask/__init__.py b/python/cugraph/cugraph/dask/__init__.py
index 617eb25a2b..69aba2c8aa 100644
--- a/python/cugraph/cugraph/dask/__init__.py
+++ b/python/cugraph/cugraph/dask/__init__.py
@@ -45,6 +45,7 @@
 from .link_prediction.cosine import cosine
 from .link_prediction.cosine import all_pairs_cosine
 from .community.leiden import leiden
+from .community.ecg import ecg
 
 # Avoid "p2p" shuffling in dask for now
 config.set({"dataframe.shuffle.method": "tasks"})
diff --git a/python/cugraph/cugraph/dask/community/__init__.py b/python/cugraph/cugraph/dask/community/__init__.py
index 9b5301d0e4..146e837bd8 100644
--- a/python/cugraph/cugraph/dask/community/__init__.py
+++ b/python/cugraph/cugraph/dask/community/__init__.py
@@ -16,3 +16,4 @@
 from .induced_subgraph import induced_subgraph
 from .leiden import leiden
 from .ktruss_subgraph import ktruss_subgraph
+from .ecg import ecg
diff --git a/python/cugraph/cugraph/dask/community/ecg.py b/python/cugraph/cugraph/dask/community/ecg.py
index 8556b4da78..3ed9947783 100644
--- a/python/cugraph/cugraph/dask/community/ecg.py
+++ b/python/cugraph/cugraph/dask/community/ecg.py
@@ -47,19 +47,23 @@ def convert_to_cudf(result: cp.ndarray) -> Tuple[cudf.DataFrame, float]:
 def _call_plc_ecg(
     sID: bytes,
     mg_graph_x,
-    max_iter: int,
+    min_weight: float,
+    ensemble_size: int,
+    max_level: int,
+    threshold: float,
     resolution: int,
     random_state: int,
-    theta: int,
     do_expensive_check: bool,
 ) -> Tuple[cp.ndarray, cp.ndarray, float]:
     return pylibcugraph_ecg(
         resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
         random_state=random_state,
         graph=mg_graph_x,
-        max_level=max_iter,
+        min_weight=min_weight,
+        ensemble_size=ensemble_size,
+        max_level=max_level,
+        threshold=threshold,
         resolution=resolution,
-        theta=theta,
         do_expensive_check=do_expensive_check,
     )
 
@@ -71,8 +75,7 @@ def ecg(
     max_level: int = 10,
     threshold: float = 1e-7,
     resolution: float = 1.0,
-    random_state: int = None,
-    weight=None,
+    random_state: int = None
 ) -> Tuple[dask_cudf.DataFrame, float]:
     """
     Compute the Ensemble Clustering for Graphs (ECG) partition of the input
@@ -123,10 +126,6 @@ def ecg(
         Random state to use when generating samples.  Optional argument,
         defaults to a hash of process id, time, and hostname.
 
-    weight : str, optional (default=None)
-        Deprecated.
-        This parameter is here for NetworkX compatibility and
-        represents which NetworkX data column represents Edge weights.
 
     Returns
     -------
@@ -161,9 +160,6 @@ def ecg(
 
     """
 
-    if input_graph.is_directed():
-        raise ValueError("input graph must be undirected")
-
     # Return a client if one has started
     client = default_client()
 
@@ -174,10 +170,12 @@ def ecg(
             _call_plc_ecg,
             Comms.get_session_id(),
             input_graph._plc_graph[w],
-            max_iter,
+            min_weight,
+            ensemble_size,
+            max_level,
+            threshold,
             resolution,
             random_state,
-            theta,
             do_expensive_check,
             workers=[w],
             allow_other_workers=False,

From 5015e30e7f79e438d8288b069b265487d0b3f510 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 16 Jan 2025 07:06:18 -0800
Subject: [PATCH 51/60] add mg ecg tests

---
 .../cugraph/tests/community/test_ecg_mg.py    | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 python/cugraph/cugraph/tests/community/test_ecg_mg.py

diff --git a/python/cugraph/cugraph/tests/community/test_ecg_mg.py b/python/cugraph/cugraph/tests/community/test_ecg_mg.py
new file mode 100644
index 0000000000..356c812fea
--- /dev/null
+++ b/python/cugraph/cugraph/tests/community/test_ecg_mg.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import cugraph
+import cugraph.dask as dcg
+from cugraph.datasets import karate, dolphins, netscience
+
+
+# =============================================================================
+# Parameters
+# =============================================================================
+
+
+DATASETS = [dolphins, karate, netscience]
+
+MIN_WEIGHTS = [0.05, 0.15]
+
+ENSEMBLE_SIZES = [16, 32]
+
+MAX_LEVELS = [10, 20]
+
+RESOLUTIONS = [0.95, 1.0]
+
+THRESHOLDS = [1e-6, 1e-07]
+
+RANDOM_STATES = [0, 42]
+
+
+# =============================================================================
+# Helper Functions
+# =============================================================================
+
+
+def get_mg_graph(dataset, directed):
+    """Returns an MG graph"""
+    ddf = dataset.get_dask_edgelist()
+
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(ddf, "src", "dst", "wgt")
+
+    return dg
+
+
+def golden_call(filename):
+    if filename == "dolphins":
+        return 0.4962422251701355
+    if filename == "karate":
+        return 0.38428664207458496
+    if filename == "netscience":
+        return 0.9279554486274719
+
+
+# =============================================================================
+# Tests
+# =============================================================================
+# FIXME: Implement more robust tests
+
+
+@pytest.mark.mg
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("min_weight", MIN_WEIGHTS)
+@pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES)
+@pytest.mark.parametrize("max_level", MAX_LEVELS)
+@pytest.mark.parametrize("threshold", THRESHOLDS)
+@pytest.mark.parametrize("resolution", RESOLUTIONS)
+@pytest.mark.parametrize("random_state", RANDOM_STATES)
+def test_mg_ecg(
+    dask_client, dataset, min_weight, ensemble_size, max_level, threshold, resolution, random_state):
+    filename = dataset.metadata["name"]
+    dg = get_mg_graph(dataset, directed=False)
+    parts, mod = dcg.ecg(
+        dg,
+        min_weight=min_weight,
+        ensemble_size=ensemble_size,
+        max_level=max_level,
+        threshold=threshold,
+        resolution=resolution,
+        random_state=random_state)
+
+    filename = dataset.metadata["name"]
+    golden_score = golden_call(filename)
+
+    # Assert that the partitioning has better modularity than the random
+    # assignment
+    assert mod > (0.80 * golden_score)
+
+    #print("mod score = ", mod)
+
+    # FIXME: either call Nx with the same dataset and compare results, or
+    # hardcode golden results to compare to.
+    print()
+    print(parts.compute())
+    print(mod)
+    print()

From 92bfb5771ef78d37fead4de4c55c9e13fa2efd0f Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 16 Jan 2025 08:30:37 -0800
Subject: [PATCH 52/60] pass different random seeds to each GPU

---
 python/cugraph/cugraph/dask/community/ecg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cugraph/cugraph/dask/community/ecg.py b/python/cugraph/cugraph/dask/community/ecg.py
index 3ed9947783..63609a7ca1 100644
--- a/python/cugraph/cugraph/dask/community/ecg.py
+++ b/python/cugraph/cugraph/dask/community/ecg.py
@@ -175,12 +175,12 @@ def ecg(
             max_level,
             threshold,
             resolution,
-            random_state,
+            (random_state + i) if random_state is not None else random_state,
             do_expensive_check,
             workers=[w],
             allow_other_workers=False,
         )
-        for w in Comms.get_workers()
+        for i, w in enumerate(Comms.get_workers())
     ]
 
     wait(result)

From 1282d52fdc188a47b803ed39f2292b5d4e2cc8cf Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 16 Jan 2025 08:31:03 -0800
Subject: [PATCH 53/60] add mg tests for biased random walks

---
 .../sampling/test_biased_random_walks_mg.py   | 212 ++++++++++++++++++
 .../sampling/test_uniform_random_walks_mg.py  | 212 ++++++++++++++++++
 2 files changed, 424 insertions(+)
 create mode 100644 python/cugraph/cugraph/tests/sampling/test_biased_random_walks_mg.py
 create mode 100644 python/cugraph/cugraph/tests/sampling/test_uniform_random_walks_mg.py

diff --git a/python/cugraph/cugraph/tests/sampling/test_biased_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks_mg.py
new file mode 100644
index 0000000000..db51e6ca79
--- /dev/null
+++ b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks_mg.py
@@ -0,0 +1,212 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import random
+
+import pytest
+
+import cugraph
+import dask_cudf
+import cugraph.dask as dcg
+import cudf
+from cugraph.testing import SMALL_DATASETS
+from cugraph.datasets import karate_asymmetric
+from cugraph.structure.symmetrize import symmetrize
+from pylibcugraph.testing.utils import gen_fixture_params_product
+
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+
+
+def setup_function():
+    gc.collect()
+
+
+IS_DIRECTED = [True, False]
+
+
+# =============================================================================
+# Pytest fixtures
+# =============================================================================
+
+datasets = SMALL_DATASETS + [karate_asymmetric]
+
+fixture_params = gen_fixture_params_product(
+    (datasets, "graph_file"),
+    (IS_DIRECTED, "directed"),
+)
+
+
+def calc_biased_random_walks(G):
+    """
+    compute random walks
+
+    parameters
+    ----------
+    G : cuGraph.Graph or networkx.Graph
+        The graph can be either directed (DiGraph) or undirected (Graph).
+        Weights in the graph are ignored.
+        Use weight parameter if weights need to be considered
+        (currently not supported)
+
+    Returns
+    -------
+    vertex_paths : cudf.Series or cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    max_path_length : int
+        The maximum path length
+
+    start_vertices : list
+        Roots for the random walks
+
+    max_depth : int
+    """
+    k = random.randint(1, 4)
+    max_depth = random.randint(2, 4)
+
+    start_vertices = G.nodes().compute().sample(k).reset_index(drop=True)
+
+    vertex_paths, edge_weights, max_path_length = dcg.biased_random_walks(
+        G, start_vertices, max_depth
+    )
+
+    return (vertex_paths, edge_weights, max_path_length), start_vertices, max_depth
+
+
+def check_biased_random_walks(G, path_data, seeds, max_depth, df_G=None):
+    invalid_edge = 0
+    invalid_edge_wgt_path = 0
+    invalid_seeds = 0
+    next_path_idx = 0
+    invalid_edge_wgt_path = 0
+    e_wgt_path_idx = 0
+    v_paths = path_data[0].compute()
+    e_paths = path_data[1].compute()
+
+    max_path_length = path_data[2]
+    sizes = max_path_length
+
+    for _ in range(len(seeds)):
+        for i in range(next_path_idx, next_path_idx + sizes):
+            src, dst = v_paths.iloc[i], v_paths.iloc[i + 1]
+
+            if i == next_path_idx and src not in seeds.values:
+                invalid_seeds += 1
+                print("[ERR] Invalid seed: " " src {} != src {}".format(src, seeds))
+
+            else:
+                # If everything is good proceed to the next part
+                # now check the destination
+
+                # find the src out_degree to ensure it effectively has no outgoing edges
+                # No need to check for -1 values, move to the next iteration
+                if src != -1:
+                    src_degree = G.out_degree([src])["degree"].compute()[0]
+                    if dst == -1 and src_degree == 0:
+                        if e_paths.values[e_wgt_path_idx] != 0:
+                            wgt = e_paths.values[e_wgt_path_idx]
+                            print(
+                                "[ERR] Invalid edge weight path: "
+                                "Edge src {} dst {} has wgt 0 "
+                                "But got wgt {}".format(src, dst, wgt)
+                            )
+                            invalid_edge_wgt_path += 1
+                    else:
+                        exp_edge = df_G.loc[
+                            (df_G["src"] == (src)) & (df_G["dst"] == (dst))
+                        ].reset_index(drop=True)
+
+                        if len(exp_edge) == 0:
+                            print(
+                                "[ERR] Invalid edge: "
+                                "There is no edge src {} dst {}".format(src, dst)
+                            )
+                            invalid_edge += 1
+                        else:
+                            # This is a valid edge, check the edge_wgt_path
+                            if e_paths.values[e_wgt_path_idx] != 1:
+                                wgt = e_paths.values[e_wgt_path_idx]
+                                print(
+                                    "[ERR] Invalid edge weight path: "
+                                    "Edge src {} dst {} has wgt 1 "
+                                    "But got wgt {}".format(src, dst, wgt)
+                                )
+                                invalid_edge_wgt_path += 1
+                else:
+                    # v_path: src == -1, dst == -1 => e_wgt_path=0 otherwise ERROR
+                    if e_paths.values[e_wgt_path_idx] != 0:
+                        wgt = e_paths.values[e_wgt_path_idx]
+                        print(
+                            "[ERR] Invalid edge weight path: "
+                            "Edge src {} dst {} has wgt 0 "
+                            "But got wgt {}".format(src, dst, wgt)
+                        )
+                        invalid_edge_wgt_path += 1
+
+            e_wgt_path_idx += 1
+        next_path_idx += sizes + 1
+
+    assert invalid_edge == 0
+    assert invalid_seeds == 0
+    assert invalid_edge_wgt_path == 0
+    assert max_path_length == max_depth
+
+
+@pytest.fixture(scope="module", params=fixture_params)
+def input_graph(request):
+    """
+    Simply return the current combination of params as a dictionary for use in
+    tests or other parameterized fixtures.
+    """
+    parameters = dict(zip(("graph_file", "directed"), request.param))
+    input_data_path = parameters["graph_file"].get_path()
+    directed = parameters["directed"]
+
+    chunksize = dcg.get_chunksize(input_data_path)
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        blocksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        edge_attr="value",
+        renumber=True,
+        store_transposed=True,
+    )
+
+    return dg
+
+
+@pytest.mark.mg
+def test_dask_mg_biased_random_walks(dask_client, input_graph):
+    path_data, seeds, max_depth = calc_biased_random_walks(input_graph)
+    df_G = input_graph.input_df.compute().reset_index(drop=True)
+
+    df_G = input_graph.decompress_to_edgelist(
+        return_unrenumbered_edgelist=True).compute().reset_index(drop=True)
+
+    check_biased_random_walks(input_graph, path_data, seeds, max_depth, df_G)
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks_mg.py
new file mode 100644
index 0000000000..50aeb0ec84
--- /dev/null
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks_mg.py
@@ -0,0 +1,212 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import random
+
+import pytest
+
+import cugraph
+import dask_cudf
+import cugraph.dask as dcg
+import cudf
+from cugraph.testing import SMALL_DATASETS
+from cugraph.datasets import karate_asymmetric
+from cugraph.structure.symmetrize import symmetrize
+from pylibcugraph.testing.utils import gen_fixture_params_product
+
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+
+
+def setup_function():
+    gc.collect()
+
+
+IS_DIRECTED = [True, False]
+
+
+# =============================================================================
+# Pytest fixtures
+# =============================================================================
+
+datasets = SMALL_DATASETS + [karate_asymmetric]
+
+fixture_params = gen_fixture_params_product(
+    (datasets, "graph_file"),
+    (IS_DIRECTED, "directed"),
+)
+
+
+def calc_uniform_random_walks(G):
+    """
+    compute random walks
+
+    parameters
+    ----------
+    G : cuGraph.Graph or networkx.Graph
+        The graph can be either directed (DiGraph) or undirected (Graph).
+        Weights in the graph are ignored.
+        Use weight parameter if weights need to be considered
+        (currently not supported)
+
+    Returns
+    -------
+    vertex_paths : cudf.Series or cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    max_path_length : int
+        The maximum path length
+
+    start_vertices : list
+        Roots for the random walks
+
+    max_depth : int
+    """
+    k = random.randint(1, 4)
+    max_depth = random.randint(2, 4)
+
+    start_vertices = G.nodes().compute().sample(k).reset_index(drop=True)
+
+    vertex_paths, edge_weights, max_path_length = dcg.uniform_random_walks(
+        G, start_vertices, max_depth
+    )
+
+    return (vertex_paths, edge_weights, max_path_length), start_vertices, max_depth
+
+
+def check_uniform_random_walks(G, path_data, seeds, max_depth, df_G=None):
+    invalid_edge = 0
+    invalid_edge_wgt_path = 0
+    invalid_seeds = 0
+    next_path_idx = 0
+    invalid_edge_wgt_path = 0
+    e_wgt_path_idx = 0
+    v_paths = path_data[0].compute()
+    e_paths = path_data[1].compute()
+
+    max_path_length = path_data[2]
+    sizes = max_path_length
+
+    for _ in range(len(seeds)):
+        for i in range(next_path_idx, next_path_idx + sizes):
+            src, dst = v_paths.iloc[i], v_paths.iloc[i + 1]
+
+            if i == next_path_idx and src not in seeds.values:
+                invalid_seeds += 1
+                print("[ERR] Invalid seed: " " src {} != src {}".format(src, seeds))
+
+            else:
+                # If everything is good proceed to the next part
+                # now check the destination
+
+                # find the src out_degree to ensure it effectively has no outgoing edges
+                # No need to check for -1 values, move to the next iteration
+                if src != -1:
+                    src_degree = G.out_degree([src])["degree"].compute()[0]
+                    if dst == -1 and src_degree == 0:
+                        if e_paths.values[e_wgt_path_idx] != 0:
+                            wgt = e_paths.values[e_wgt_path_idx]
+                            print(
+                                "[ERR] Invalid edge weight path: "
+                                "Edge src {} dst {} has wgt 0 "
+                                "But got wgt {}".format(src, dst, wgt)
+                            )
+                            invalid_edge_wgt_path += 1
+                    else:
+                        exp_edge = df_G.loc[
+                            (df_G["src"] == (src)) & (df_G["dst"] == (dst))
+                        ].reset_index(drop=True)
+
+                        if len(exp_edge) == 0:
+                            print(
+                                "[ERR] Invalid edge: "
+                                "There is no edge src {} dst {}".format(src, dst)
+                            )
+                            invalid_edge += 1
+                        else:
+                            # This is a valid edge, check the edge_wgt_path
+                            if e_paths.values[e_wgt_path_idx] != 1:
+                                wgt = e_paths.values[e_wgt_path_idx]
+                                print(
+                                    "[ERR] Invalid edge weight path: "
+                                    "Edge src {} dst {} has wgt 1 "
+                                    "But got wgt {}".format(src, dst, wgt)
+                                )
+                                invalid_edge_wgt_path += 1
+                else:
+                    # v_path: src == -1, dst == -1 => e_wgt_path=0 otherwise ERROR
+                    if e_paths.values[e_wgt_path_idx] != 0:
+                        wgt = e_paths.values[e_wgt_path_idx]
+                        print(
+                            "[ERR] Invalid edge weight path: "
+                            "Edge src {} dst {} has wgt 0 "
+                            "But got wgt {}".format(src, dst, wgt)
+                        )
+                        invalid_edge_wgt_path += 1
+
+            e_wgt_path_idx += 1
+        next_path_idx += sizes + 1
+
+    assert invalid_edge == 0
+    assert invalid_seeds == 0
+    assert invalid_edge_wgt_path == 0
+    assert max_path_length == max_depth
+
+
+@pytest.fixture(scope="module", params=fixture_params)
+def input_graph(request):
+    """
+    Simply return the current combination of params as a dictionary for use in
+    tests or other parameterized fixtures.
+    """
+    parameters = dict(zip(("graph_file", "directed"), request.param))
+    input_data_path = parameters["graph_file"].get_path()
+    directed = parameters["directed"]
+
+    chunksize = dcg.get_chunksize(input_data_path)
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        blocksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        edge_attr="value",
+        renumber=True,
+        store_transposed=True,
+    )
+
+    return dg
+
+
+@pytest.mark.mg
+def test_dask_mg_uniform_random_walks(dask_client, input_graph):
+    path_data, seeds, max_depth = calc_uniform_random_walks(input_graph)
+    df_G = input_graph.input_df.compute().reset_index(drop=True)
+
+    df_G = input_graph.decompress_to_edgelist(
+        return_unrenumbered_edgelist=True).compute().reset_index(drop=True)
+
+    check_uniform_random_walks(input_graph, path_data, seeds, max_depth, df_G)

From 79d7b5cb4f5a8286e424e0c158524d816643f20a Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 16 Jan 2025 08:39:49 -0800
Subject: [PATCH 54/60] add node2vec mg tests

---
 .../sampling/test_node2vec_random_walks_mg.py | 212 ++++++++++++++++++
 1 file changed, 212 insertions(+)
 create mode 100644 python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks_mg.py

diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks_mg.py
new file mode 100644
index 0000000000..ad6b2022e7
--- /dev/null
+++ b/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks_mg.py
@@ -0,0 +1,212 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import random
+
+import pytest
+
+import cugraph
+import dask_cudf
+import cugraph.dask as dcg
+import cudf
+from cugraph.testing import SMALL_DATASETS
+from cugraph.datasets import karate_asymmetric
+from cugraph.structure.symmetrize import symmetrize
+from pylibcugraph.testing.utils import gen_fixture_params_product
+
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+
+
+def setup_function():
+    gc.collect()
+
+
+IS_DIRECTED = [True, False]
+
+
+# =============================================================================
+# Pytest fixtures
+# =============================================================================
+
+datasets = SMALL_DATASETS + [karate_asymmetric]
+
+fixture_params = gen_fixture_params_product(
+    (datasets, "graph_file"),
+    (IS_DIRECTED, "directed"),
+)
+
+
+def calc_node2vec_random_walks(G):
+    """
+    compute random walks
+
+    parameters
+    ----------
+    G : cuGraph.Graph or networkx.Graph
+        The graph can be either directed (DiGraph) or undirected (Graph).
+        Weights in the graph are ignored.
+        Use weight parameter if weights need to be considered
+        (currently not supported)
+
+    Returns
+    -------
+    vertex_paths : cudf.Series or cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    max_path_length : int
+        The maximum path length
+
+    start_vertices : list
+        Roots for the random walks
+
+    max_depth : int
+    """
+    k = random.randint(1, 4)
+    max_depth = random.randint(2, 4)
+
+    start_vertices = G.nodes().compute().sample(k).reset_index(drop=True)
+
+    vertex_paths, edge_weights, max_path_length = dcg.node2vec_random_walks(
+        G, start_vertices, max_depth
+    )
+
+    return (vertex_paths, edge_weights, max_path_length), start_vertices, max_depth
+
+
+def check_node2vec_random_walks(G, path_data, seeds, max_depth, df_G=None):
+    invalid_edge = 0
+    invalid_edge_wgt_path = 0
+    invalid_seeds = 0
+    next_path_idx = 0
+    invalid_edge_wgt_path = 0
+    e_wgt_path_idx = 0
+    v_paths = path_data[0].compute()
+    e_paths = path_data[1].compute()
+
+    max_path_length = path_data[2]
+    sizes = max_path_length
+
+    for _ in range(len(seeds)):
+        for i in range(next_path_idx, next_path_idx + sizes):
+            src, dst = v_paths.iloc[i], v_paths.iloc[i + 1]
+
+            if i == next_path_idx and src not in seeds.values:
+                invalid_seeds += 1
+                print("[ERR] Invalid seed: " " src {} != src {}".format(src, seeds))
+
+            else:
+                # If everything is good proceed to the next part
+                # now check the destination
+
+                # find the src out_degree to ensure it effectively has no outgoing edges
+                # No need to check for -1 values, move to the next iteration
+                if src != -1:
+                    src_degree = G.out_degree([src])["degree"].compute()[0]
+                    if dst == -1 and src_degree == 0:
+                        if e_paths.values[e_wgt_path_idx] != 0:
+                            wgt = e_paths.values[e_wgt_path_idx]
+                            print(
+                                "[ERR] Invalid edge weight path: "
+                                "Edge src {} dst {} has wgt 0 "
+                                "But got wgt {}".format(src, dst, wgt)
+                            )
+                            invalid_edge_wgt_path += 1
+                    else:
+                        exp_edge = df_G.loc[
+                            (df_G["src"] == (src)) & (df_G["dst"] == (dst))
+                        ].reset_index(drop=True)
+
+                        if len(exp_edge) == 0:
+                            print(
+                                "[ERR] Invalid edge: "
+                                "There is no edge src {} dst {}".format(src, dst)
+                            )
+                            invalid_edge += 1
+                        else:
+                            # This is a valid edge, check the edge_wgt_path
+                            if e_paths.values[e_wgt_path_idx] != 1:
+                                wgt = e_paths.values[e_wgt_path_idx]
+                                print(
+                                    "[ERR] Invalid edge weight path: "
+                                    "Edge src {} dst {} has wgt 1 "
+                                    "But got wgt {}".format(src, dst, wgt)
+                                )
+                                invalid_edge_wgt_path += 1
+                else:
+                    # v_path: src == -1, dst == -1 => e_wgt_path=0 otherwise ERROR
+                    if e_paths.values[e_wgt_path_idx] != 0:
+                        wgt = e_paths.values[e_wgt_path_idx]
+                        print(
+                            "[ERR] Invalid edge weight path: "
+                            "Edge src {} dst {} has wgt 0 "
+                            "But got wgt {}".format(src, dst, wgt)
+                        )
+                        invalid_edge_wgt_path += 1
+
+            e_wgt_path_idx += 1
+        next_path_idx += sizes + 1
+
+    assert invalid_edge == 0
+    assert invalid_seeds == 0
+    assert invalid_edge_wgt_path == 0
+    assert max_path_length == max_depth
+
+
+@pytest.fixture(scope="module", params=fixture_params)
+def input_graph(request):
+    """
+    Simply return the current combination of params as a dictionary for use in
+    tests or other parameterized fixtures.
+    """
+    parameters = dict(zip(("graph_file", "directed"), request.param))
+    input_data_path = parameters["graph_file"].get_path()
+    directed = parameters["directed"]
+
+    chunksize = dcg.get_chunksize(input_data_path)
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        blocksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
+    dg = cugraph.Graph(directed=directed)
+    dg.from_dask_cudf_edgelist(
+        ddf,
+        source="src",
+        destination="dst",
+        edge_attr="value",
+        renumber=True,
+        store_transposed=True,
+    )
+
+    return dg
+
+
+@pytest.mark.mg
+def test_dask_mg_node2vec_random_walks(dask_client, input_graph):
+    path_data, seeds, max_depth = calc_node2vec_random_walks(input_graph)
+    df_G = input_graph.input_df.compute().reset_index(drop=True)
+
+    df_G = input_graph.decompress_to_edgelist(
+        return_unrenumbered_edgelist=True).compute().reset_index(drop=True)
+
+    check_node2vec_random_walks(input_graph, path_data, seeds, max_depth, df_G)

From b22d4c8590bfed11bb180a76a0e34ad634c92199 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 16 Jan 2025 08:50:46 -0800
Subject: [PATCH 55/60] add sg node2vec tests

---
 .../sampling/test_biased_random_walks.py      |   2 +-
 .../sampling/test_node2vec_random_walks.py    | 274 ++++++++++++++++++
 .../sampling/test_uniform_random_walks.py     |   2 +-
 3 files changed, 276 insertions(+), 2 deletions(-)
 create mode 100644 python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks.py

diff --git a/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py
index b6097aa1ed..cbd4b48329 100644
--- a/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py
+++ b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py
@@ -219,7 +219,7 @@ def test_biased_random_walks(graph_file, directed):
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", SMALL_DATASETS)
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-def test_random_walks_multi_column_seeds(
+def test_biased_random_walks_multi_column_seeds(
     graph_file,
     directed
 ):
diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks.py
new file mode 100644
index 0000000000..de46ec72eb
--- /dev/null
+++ b/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks.py
@@ -0,0 +1,274 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.:
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import random
+
+import pytest
+import networkx as nx
+
+import cudf
+import cugraph
+from cudf.testing import assert_series_equal
+from cudf.testing.testing import assert_frame_equal
+from cugraph.utilities import ensure_cugraph_obj_for_nx
+from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS, utils
+
+
+# =============================================================================
+# Parameters
+# =============================================================================
+DIRECTED_GRAPH_OPTIONS = [False, True]
+WEIGHTED_GRAPH_OPTIONS = [False, True]
+DATASETS = [pytest.param(d) for d in DEFAULT_DATASETS]
+SMALL_DATASETS = [pytest.param(d) for d in SMALL_DATASETS]
+
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
+def calc_node2vec_random_walks(G, max_depth=None):
+    """
+    compute random walks for each nodes in 'start_vertices'
+
+    parameters
+    ----------
+    G : cuGraph.Graph or networkx.Graph
+        The graph can be either directed or undirected.
+        Weights in the graph are ignored.
+        Use weight parameter if weights need to be considered
+        (currently not supported)
+
+    max_depth : int
+        The maximum depth of the random walks
+
+    Returns
+    -------
+    vertex_paths : cudf.Series or cudf.DataFrame
+        Series containing the vertices of edges/paths in the random walk.
+
+    edge_weight_paths: cudf.Series
+        Series containing the edge weights of edges represented by the
+        returned vertex_paths
+
+    sizes: int
+        The path size in case of coalesced paths.
+    """
+    assert G is not None
+
+    G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt")
+
+    k = random.randint(1, 6)
+
+    random_walks_type = "node2vec"
+
+    start_vertices = G.select_random_vertices(num_vertices=k)
+
+    print("\nstart_vertices is \n", start_vertices)
+    vertex_paths, edge_weights, vertex_path_sizes = cugraph.node2vec_random_walks(
+        G, start_vertices, max_depth
+    )
+
+    return (vertex_paths, edge_weights, vertex_path_sizes), start_vertices
+
+
+
+
+def check_node2vec_random_walks(G, path_data, seeds, max_depth):
+    invalid_edge = 0
+    invalid_seeds = 0
+    invalid_edge_wgt = 0
+    v_paths = path_data[0]
+    e_wgt_paths = path_data[1]
+    e_wgt_idx = 0
+
+    G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt")
+    df_G = G.input_df
+
+    if "weight" in df_G.columns:
+        df_G = df_G.rename(columns={"weight": "wgt"})
+
+    total_depth = (max_depth) * len(seeds)
+
+    for i in range(total_depth):
+        if isinstance(seeds, cudf.DataFrame):
+            vertex_1 = v_paths.iloc[[i]].reset_index(drop=True)
+            vertex_2 = v_paths.iloc[[i + 1]].reset_index(drop=True)
+        else:
+            vertex_1, vertex_2 = v_paths.iloc[i], v_paths.iloc[i + 1]
+
+        # Every max_depth'th vertex in 'v_paths' is a seed instead of
+        # 'seeds[i // (max_depth + 1)]', could have just pop the first element
+        # of the seeds array once there is a match and compare it to 'vertex_1'
+
+        if i % (max_depth + 1) == 0:
+            if isinstance(seeds, cudf.DataFrame):
+                assert_frame_equal(
+                    vertex_1.rename(columns={x:y for x,y in zip(vertex_1.columns,range(0,len(vertex_1.columns)))}),
+                    seeds.iloc[[i // (max_depth + 1)]].reset_index(drop=True).rename(columns={x:y for x,y in zip(seeds.columns,range(0,len(seeds.columns)))}),
+                    check_dtype=False, check_like=True)
+            else:
+                if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
+                    invalid_seeds += 1
+                    print(
+                        "[ERR] Invalid seed: "
+                        " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
+                )
+
+        if (i % (max_depth + 1)) != (max_depth):
+            # These are the edges
+            src = vertex_1
+            dst = vertex_2
+            
+            # check for valid edge.
+            if isinstance(seeds, cudf.DataFrame):
+                if (-1 not in src.iloc[0].reset_index(drop=True)) and (-1 not in dst.iloc[0].reset_index(drop=True)):
+                    edge = cudf.DataFrame()
+                    edge["src"] = vertex_1["0_vertex_paths"]
+                    edge["src_0"] = vertex_1["1_vertex_paths"]
+                    edge["dst"] = vertex_2["0_vertex_paths"]
+                    edge["dst_0"] = vertex_2["1_vertex_paths"]
+
+                    join1 = cudf.merge(df_G, edge, on=[*edge.columns])
+                    
+                    assert len(cudf.merge(df_G, edge, on=[*edge.columns])) > 0
+            else:
+                edge = df_G.loc[
+                    (df_G["src"] == (src)) & (df_G["dst"] == (dst))
+                ].reset_index(drop=True)
+
+                if len(edge) == 0:
+                    print(
+                        "[ERR] Invalid edge: "
+                        "There is no edge src {} dst {}".format(src, dst)
+                    )
+                    invalid_edge += 1
+
+                else:
+                    # check valid edge wgt
+                    if G.is_weighted():
+                        expected_wgt = edge["wgt"].iloc[0]
+                        result_wgt = e_wgt_paths.iloc[e_wgt_idx]
+
+                        if expected_wgt != result_wgt:
+                            print(
+                                "[ERR] Invalid edge wgt: "
+                                "The edge src {} dst {} has wgt {} but got {}".format(
+                                    src, dst, expected_wgt, result_wgt
+                                )
+                            )
+                            invalid_edge_wgt += 1
+                e_wgt_idx += 1
+
+                if src != -1 and dst == -1:
+                    # ensure there is no outgoing edges from 'src'
+                    assert G.out_degree([src])["degree"].iloc[0] == 0
+
+    assert invalid_seeds == 0
+    assert invalid_edge == 0
+    assert len(v_paths) == (max_depth + 1) * len(seeds)
+    if G.is_weighted():
+        assert invalid_edge_wgt == 0
+        assert len(e_wgt_paths) == (max_depth) * len(seeds)
+
+    
+    max_path_lenth = path_data[2]
+    assert max_path_lenth == max_depth
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+@pytest.mark.parametrize("max_depth", [None])
+def test_node2vec_random_walks_invalid_max_dept(graph_file, directed, max_depth):
+
+    input_graph = graph_file.get_graph(create_using=cugraph.Graph(directed=directed))
+    with pytest.raises(ValueError):
+        _, _, _ = calc_node2vec_random_walks(input_graph, max_depth=max_depth)
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+def test_node2vec_random_walks(graph_file, directed):
+    max_depth = random.randint(2, 10)
+    print("max_depth is ", max_depth)
+    input_graph = graph_file.get_graph(create_using=cugraph.Graph(directed=directed))
+
+    path_data, seeds = calc_node2vec_random_walks(
+        input_graph, max_depth=max_depth
+    )
+
+    check_node2vec_random_walks(input_graph, path_data, seeds, max_depth)
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+def test_node2vec_random_walks_nx(graph_file):
+    G = graph_file.get_graph(create_using=cugraph.Graph(directed=True))
+
+    M = G.to_pandas_edgelist()
+
+    source = G.source_columns
+    target = G.destination_columns
+    edge_attr = G.weight_column
+
+    Gnx = nx.from_pandas_edgelist(
+        M,
+        source=source,
+        target=target,
+        edge_attr=edge_attr,
+        create_using=nx.DiGraph(),
+    )
+    max_depth = random.randint(2, 10)
+    path_data, seeds = calc_node2vec_random_walks(Gnx, max_depth=max_depth)
+
+    check_node2vec_random_walks(Gnx, path_data, seeds, max_depth)
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+def test_node2vec_random_walks_multi_column_seeds(
+    graph_file,
+    directed
+):
+    max_depth = random.randint(2, 10)
+    df_G = graph_file.get_edgelist()
+    df_G.rename(
+        columns={"wgt": "weight"}, inplace=True)
+    df_G['src_0'] = df_G['src'] + 1000
+    df_G['dst_0'] = df_G['dst'] + 1000
+
+    if directed:
+        G = cugraph.Graph(directed=True)
+    else:
+        G = cugraph.Graph()
+    G.from_cudf_edgelist(df_G, source=['src', 'src_0'],
+                         destination=['dst', 'dst_0'],
+                         edge_attr="weight")
+
+    k = random.randint(1, 10)
+
+    seeds = G.select_random_vertices(num_vertices=k)
+    vertex_paths, edge_weights, vertex_path_sizes = cugraph.node2vec_random_walks(
+        G, seeds, max_depth)
+    
+    path_data = (vertex_paths, edge_weights, vertex_path_sizes)
+    
+    check_node2vec_random_walks(G, path_data, seeds, max_depth)
+    
\ No newline at end of file
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
index 7fca738575..ef2fc00af1 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
@@ -243,7 +243,7 @@ def test_uniform_random_walks_nx(graph_file):
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", SMALL_DATASETS)
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-def test_random_walks_multi_column_seeds(
+def test_uniform_random_walks_multi_column_seeds(
     graph_file,
     directed
 ):

From c1f6ff0593afbdc8896e134f05a3a444ee0aec0c Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 16 Jan 2025 09:06:25 -0800
Subject: [PATCH 56/60] deprecate tests

---
 python/cugraph/cugraph/tests/sampling/test_node2vec.py        | 2 ++
 python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec.py b/python/cugraph/cugraph/tests/sampling/test_node2vec.py
index 00c3270533..060a3015a7 100644
--- a/python/cugraph/cugraph/tests/sampling/test_node2vec.py
+++ b/python/cugraph/cugraph/tests/sampling/test_node2vec.py
@@ -48,6 +48,8 @@ def _get_param_args(param_name, param_values):
     return (param_name, [pytest.param(v, id=f"{param_name}={v}") for v in param_values])
 
 
+# FIXME: This test suite must be removed once node2vec is removed from
+# the python API in favor of node2vecrandom walks
 def calc_node2vec(G, start_vertices, max_depth, compress_result, p=1.0, q=1.0):
     """
     Compute node2vec for each nodes in 'start_vertices'
diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py
index 96b34c638b..033e6760e0 100644
--- a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py
@@ -50,6 +50,8 @@ def setup_function():
 )
 
 
+# FIXME: This test suite must be removed once random_walks is removed from
+# the python API in favor of uniform random walks
 def calc_random_walks(G):
     """
     compute random walks

From 75e1253b27c6a50d817c0efb8b0ddc83ace730bb Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 16 Jan 2025 10:37:35 -0800
Subject: [PATCH 57/60] fix style

---
 .../cugraph/dask/community/__init__.py        |  2 +-
 python/cugraph/cugraph/dask/community/ecg.py  |  9 +--
 .../cugraph/sampling/node2vec_random_walks.py |  2 +-
 .../graph_implementation/simpleGraph.py       |  2 +-
 .../cugraph/tests/community/test_ecg_mg.py    | 17 ++++-
 .../sampling/test_biased_random_walks.py      | 75 ++++++++++---------
 .../sampling/test_biased_random_walks_mg.py   | 11 +--
 .../cugraph/tests/sampling/test_node2vec.py   |  2 +-
 .../sampling/test_node2vec_random_walks.py    | 74 +++++++++---------
 .../sampling/test_node2vec_random_walks_mg.py | 11 +--
 .../tests/sampling/test_random_walks_mg.py    |  2 +-
 .../sampling/test_uniform_random_walks.py     | 74 +++++++++---------
 .../sampling/test_uniform_random_walks_mg.py  | 11 +--
 13 files changed, 159 insertions(+), 133 deletions(-)

diff --git a/python/cugraph/cugraph/dask/community/__init__.py b/python/cugraph/cugraph/dask/community/__init__.py
index 146e837bd8..4d848385c4 100644
--- a/python/cugraph/cugraph/dask/community/__init__.py
+++ b/python/cugraph/cugraph/dask/community/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/dask/community/ecg.py b/python/cugraph/cugraph/dask/community/ecg.py
index 63609a7ca1..6f9f716402 100644
--- a/python/cugraph/cugraph/dask/community/ecg.py
+++ b/python/cugraph/cugraph/dask/community/ecg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,10 +26,7 @@
 from pylibcugraph import ecg as pylibcugraph_ecg
 import numpy
 import cupy as cp
-from typing import Tuple, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from cugraph import Graph
+from typing import Tuple
 
 
 def convert_to_cudf(result: cp.ndarray) -> Tuple[cudf.DataFrame, float]:
@@ -75,7 +72,7 @@ def ecg(
     max_level: int = 10,
     threshold: float = 1e-7,
     resolution: float = 1.0,
-    random_state: int = None
+    random_state: int = None,
 ) -> Tuple[dask_cudf.DataFrame, float]:
     """
     Compute the Ensemble Clustering for Graphs (ECG) partition of the input
diff --git a/python/cugraph/cugraph/sampling/node2vec_random_walks.py b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
index 68e9f3f072..5e7c352534 100644
--- a/python/cugraph/cugraph/sampling/node2vec_random_walks.py
+++ b/python/cugraph/cugraph/sampling/node2vec_random_walks.py
@@ -161,5 +161,5 @@ def node2vec_random_walks(
             vertex_paths = df_.fillna(-1)
         else:
             vertex_paths = cudf.Series(df_["vertex_paths"]).fillna(-1)
-    
+
     return vertex_paths, edge_wgt_paths, max_depth
diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py
index d9beba70e4..8086c9ddb4 100644
--- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py
+++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/tests/community/test_ecg_mg.py b/python/cugraph/cugraph/tests/community/test_ecg_mg.py
index 356c812fea..18f4cc4df3 100644
--- a/python/cugraph/cugraph/tests/community/test_ecg_mg.py
+++ b/python/cugraph/cugraph/tests/community/test_ecg_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -77,7 +77,15 @@ def golden_call(filename):
 @pytest.mark.parametrize("resolution", RESOLUTIONS)
 @pytest.mark.parametrize("random_state", RANDOM_STATES)
 def test_mg_ecg(
-    dask_client, dataset, min_weight, ensemble_size, max_level, threshold, resolution, random_state):
+    dask_client,
+    dataset,
+    min_weight,
+    ensemble_size,
+    max_level,
+    threshold,
+    resolution,
+    random_state,
+):
     filename = dataset.metadata["name"]
     dg = get_mg_graph(dataset, directed=False)
     parts, mod = dcg.ecg(
@@ -87,7 +95,8 @@ def test_mg_ecg(
         max_level=max_level,
         threshold=threshold,
         resolution=resolution,
-        random_state=random_state)
+        random_state=random_state,
+    )
 
     filename = dataset.metadata["name"]
     golden_score = golden_call(filename)
@@ -96,7 +105,7 @@ def test_mg_ecg(
     # assignment
     assert mod > (0.80 * golden_score)
 
-    #print("mod score = ", mod)
+    # print("mod score = ", mod)
 
     # FIXME: either call Nx with the same dataset and compare results, or
     # hardcode golden results to compare to.
diff --git a/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py
index cbd4b48329..4c12f981b1 100644
--- a/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py
+++ b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks.py
@@ -15,14 +15,12 @@
 import random
 
 import pytest
-import networkx as nx
 
 import cudf
 import cugraph
-from cudf.testing import assert_series_equal
 from cudf.testing.testing import assert_frame_equal
 from cugraph.utilities import ensure_cugraph_obj_for_nx
-from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS, utils
+from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS
 
 
 # =============================================================================
@@ -74,8 +72,6 @@ def calc_biased_random_walks(G, max_depth=None):
 
     k = random.randint(1, 6)
 
-    random_walks_type = "biased"
-
     start_vertices = G.select_random_vertices(num_vertices=k)
 
     print("\nstart_vertices is \n", start_vertices)
@@ -86,8 +82,6 @@ def calc_biased_random_walks(G, max_depth=None):
     return (vertex_paths, edge_weights, vertex_path_sizes), start_vertices
 
 
-
-
 def check_biased_random_walks(G, path_data, seeds, max_depth):
     invalid_edge = 0
     invalid_seeds = 0
@@ -118,33 +112,51 @@ def check_biased_random_walks(G, path_data, seeds, max_depth):
         if i % (max_depth + 1) == 0:
             if isinstance(seeds, cudf.DataFrame):
                 assert_frame_equal(
-                    vertex_1.rename(columns={x:y for x,y in zip(vertex_1.columns,range(0,len(vertex_1.columns)))}),
-                    seeds.iloc[[i // (max_depth + 1)]].reset_index(drop=True).rename(columns={x:y for x,y in zip(seeds.columns,range(0,len(seeds.columns)))}),
-                    check_dtype=False, check_like=True)
+                    vertex_1.rename(
+                        columns={
+                            x: y
+                            for x, y in zip(
+                                vertex_1.columns, range(0, len(vertex_1.columns))
+                            )
+                        }
+                    ),
+                    seeds.iloc[[i // (max_depth + 1)]]
+                    .reset_index(drop=True)
+                    .rename(
+                        columns={
+                            x: y
+                            for x, y in zip(seeds.columns, range(0, len(seeds.columns)))
+                        }
+                    ),
+                    check_dtype=False,
+                    check_like=True,
+                )
             else:
                 if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
                     invalid_seeds += 1
                     print(
                         "[ERR] Invalid seed: "
-                        " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
-                )
+                        " src {} != src {}".format(
+                            vertex_1, seeds[i // (max_depth + 1)]
+                        )
+                    )
 
         if (i % (max_depth + 1)) != (max_depth):
             # These are the edges
             src = vertex_1
             dst = vertex_2
-            
+
             # check for valid edge.
             if isinstance(seeds, cudf.DataFrame):
-                if (-1 not in src.iloc[0].reset_index(drop=True)) and (-1 not in dst.iloc[0].reset_index(drop=True)):
+                if (-1 not in src.iloc[0].reset_index(drop=True)) and (
+                    -1 not in dst.iloc[0].reset_index(drop=True)
+                ):
                     edge = cudf.DataFrame()
                     edge["src"] = vertex_1["0_vertex_paths"]
                     edge["src_0"] = vertex_1["1_vertex_paths"]
                     edge["dst"] = vertex_2["0_vertex_paths"]
                     edge["dst_0"] = vertex_2["1_vertex_paths"]
 
-                    join1 = cudf.merge(df_G, edge, on=[*edge.columns])
-                    
                     assert len(cudf.merge(df_G, edge, on=[*edge.columns])) > 0
             else:
                 edge = df_G.loc[
@@ -185,7 +197,6 @@ def check_biased_random_walks(G, path_data, seeds, max_depth):
         assert invalid_edge_wgt == 0
         assert len(e_wgt_paths) == (max_depth) * len(seeds)
 
-    
     max_path_lenth = path_data[2]
     assert max_path_lenth == max_depth
 
@@ -209,9 +220,7 @@ def test_biased_random_walks(graph_file, directed):
     print("max_depth is ", max_depth)
     input_graph = graph_file.get_graph(create_using=cugraph.Graph(directed=directed))
 
-    path_data, seeds = calc_biased_random_walks(
-        input_graph, max_depth=max_depth
-    )
+    path_data, seeds = calc_biased_random_walks(input_graph, max_depth=max_depth)
 
     check_biased_random_walks(input_graph, path_data, seeds, max_depth)
 
@@ -219,32 +228,28 @@ def test_biased_random_walks(graph_file, directed):
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", SMALL_DATASETS)
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-def test_biased_random_walks_multi_column_seeds(
-    graph_file,
-    directed
-):
+def test_biased_random_walks_multi_column_seeds(graph_file, directed):
     max_depth = random.randint(2, 10)
     df_G = graph_file.get_edgelist()
-    df_G.rename(
-        columns={"wgt": "weight"}, inplace=True)
-    df_G['src_0'] = df_G['src'] + 1000
-    df_G['dst_0'] = df_G['dst'] + 1000
+    df_G.rename(columns={"wgt": "weight"}, inplace=True)
+    df_G["src_0"] = df_G["src"] + 1000
+    df_G["dst_0"] = df_G["dst"] + 1000
 
     if directed:
         G = cugraph.Graph(directed=True)
     else:
         G = cugraph.Graph()
-    G.from_cudf_edgelist(df_G, source=['src', 'src_0'],
-                         destination=['dst', 'dst_0'],
-                         edge_attr="weight")
+    G.from_cudf_edgelist(
+        df_G, source=["src", "src_0"], destination=["dst", "dst_0"], edge_attr="weight"
+    )
 
     k = random.randint(1, 10)
 
     seeds = G.select_random_vertices(num_vertices=k)
     vertex_paths, edge_weights, vertex_path_sizes = cugraph.biased_random_walks(
-        G, seeds, max_depth)
-    
+        G, seeds, max_depth
+    )
+
     path_data = (vertex_paths, edge_weights, vertex_path_sizes)
-    
+
     check_biased_random_walks(G, path_data, seeds, max_depth)
-    
\ No newline at end of file
diff --git a/python/cugraph/cugraph/tests/sampling/test_biased_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks_mg.py
index db51e6ca79..5d4c8d445c 100644
--- a/python/cugraph/cugraph/tests/sampling/test_biased_random_walks_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_biased_random_walks_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,10 +19,8 @@
 import cugraph
 import dask_cudf
 import cugraph.dask as dcg
-import cudf
 from cugraph.testing import SMALL_DATASETS
 from cugraph.datasets import karate_asymmetric
-from cugraph.structure.symmetrize import symmetrize
 from pylibcugraph.testing.utils import gen_fixture_params_product
 
 
@@ -206,7 +204,10 @@ def test_dask_mg_biased_random_walks(dask_client, input_graph):
     path_data, seeds, max_depth = calc_biased_random_walks(input_graph)
     df_G = input_graph.input_df.compute().reset_index(drop=True)
 
-    df_G = input_graph.decompress_to_edgelist(
-        return_unrenumbered_edgelist=True).compute().reset_index(drop=True)
+    df_G = (
+        input_graph.decompress_to_edgelist(return_unrenumbered_edgelist=True)
+        .compute()
+        .reset_index(drop=True)
+    )
 
     check_biased_random_walks(input_graph, path_data, seeds, max_depth, df_G)
diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec.py b/python/cugraph/cugraph/tests/sampling/test_node2vec.py
index 060a3015a7..92656d7b7d 100644
--- a/python/cugraph/cugraph/tests/sampling/test_node2vec.py
+++ b/python/cugraph/cugraph/tests/sampling/test_node2vec.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks.py
index de46ec72eb..cb98087c2f 100644
--- a/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks.py
+++ b/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks.py
@@ -19,10 +19,9 @@
 
 import cudf
 import cugraph
-from cudf.testing import assert_series_equal
 from cudf.testing.testing import assert_frame_equal
 from cugraph.utilities import ensure_cugraph_obj_for_nx
-from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS, utils
+from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS
 
 
 # =============================================================================
@@ -74,8 +73,6 @@ def calc_node2vec_random_walks(G, max_depth=None):
 
     k = random.randint(1, 6)
 
-    random_walks_type = "node2vec"
-
     start_vertices = G.select_random_vertices(num_vertices=k)
 
     print("\nstart_vertices is \n", start_vertices)
@@ -86,8 +83,6 @@ def calc_node2vec_random_walks(G, max_depth=None):
     return (vertex_paths, edge_weights, vertex_path_sizes), start_vertices
 
 
-
-
 def check_node2vec_random_walks(G, path_data, seeds, max_depth):
     invalid_edge = 0
     invalid_seeds = 0
@@ -118,33 +113,51 @@ def check_node2vec_random_walks(G, path_data, seeds, max_depth):
         if i % (max_depth + 1) == 0:
             if isinstance(seeds, cudf.DataFrame):
                 assert_frame_equal(
-                    vertex_1.rename(columns={x:y for x,y in zip(vertex_1.columns,range(0,len(vertex_1.columns)))}),
-                    seeds.iloc[[i // (max_depth + 1)]].reset_index(drop=True).rename(columns={x:y for x,y in zip(seeds.columns,range(0,len(seeds.columns)))}),
-                    check_dtype=False, check_like=True)
+                    vertex_1.rename(
+                        columns={
+                            x: y
+                            for x, y in zip(
+                                vertex_1.columns, range(0, len(vertex_1.columns))
+                            )
+                        }
+                    ),
+                    seeds.iloc[[i // (max_depth + 1)]]
+                    .reset_index(drop=True)
+                    .rename(
+                        columns={
+                            x: y
+                            for x, y in zip(seeds.columns, range(0, len(seeds.columns)))
+                        }
+                    ),
+                    check_dtype=False,
+                    check_like=True,
+                )
             else:
                 if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
                     invalid_seeds += 1
                     print(
                         "[ERR] Invalid seed: "
-                        " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
-                )
+                        " src {} != src {}".format(
+                            vertex_1, seeds[i // (max_depth + 1)]
+                        )
+                    )
 
         if (i % (max_depth + 1)) != (max_depth):
             # These are the edges
             src = vertex_1
             dst = vertex_2
-            
+
             # check for valid edge.
             if isinstance(seeds, cudf.DataFrame):
-                if (-1 not in src.iloc[0].reset_index(drop=True)) and (-1 not in dst.iloc[0].reset_index(drop=True)):
+                if (-1 not in src.iloc[0].reset_index(drop=True)) and (
+                    -1 not in dst.iloc[0].reset_index(drop=True)
+                ):
                     edge = cudf.DataFrame()
                     edge["src"] = vertex_1["0_vertex_paths"]
                     edge["src_0"] = vertex_1["1_vertex_paths"]
                     edge["dst"] = vertex_2["0_vertex_paths"]
                     edge["dst_0"] = vertex_2["1_vertex_paths"]
 
-                    join1 = cudf.merge(df_G, edge, on=[*edge.columns])
-                    
                     assert len(cudf.merge(df_G, edge, on=[*edge.columns])) > 0
             else:
                 edge = df_G.loc[
@@ -185,7 +198,6 @@ def check_node2vec_random_walks(G, path_data, seeds, max_depth):
         assert invalid_edge_wgt == 0
         assert len(e_wgt_paths) == (max_depth) * len(seeds)
 
-    
     max_path_lenth = path_data[2]
     assert max_path_lenth == max_depth
 
@@ -209,9 +221,7 @@ def test_node2vec_random_walks(graph_file, directed):
     print("max_depth is ", max_depth)
     input_graph = graph_file.get_graph(create_using=cugraph.Graph(directed=directed))
 
-    path_data, seeds = calc_node2vec_random_walks(
-        input_graph, max_depth=max_depth
-    )
+    path_data, seeds = calc_node2vec_random_walks(input_graph, max_depth=max_depth)
 
     check_node2vec_random_walks(input_graph, path_data, seeds, max_depth)
 
@@ -243,32 +253,28 @@ def test_node2vec_random_walks_nx(graph_file):
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", SMALL_DATASETS)
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-def test_node2vec_random_walks_multi_column_seeds(
-    graph_file,
-    directed
-):
+def test_node2vec_random_walks_multi_column_seeds(graph_file, directed):
     max_depth = random.randint(2, 10)
     df_G = graph_file.get_edgelist()
-    df_G.rename(
-        columns={"wgt": "weight"}, inplace=True)
-    df_G['src_0'] = df_G['src'] + 1000
-    df_G['dst_0'] = df_G['dst'] + 1000
+    df_G.rename(columns={"wgt": "weight"}, inplace=True)
+    df_G["src_0"] = df_G["src"] + 1000
+    df_G["dst_0"] = df_G["dst"] + 1000
 
     if directed:
         G = cugraph.Graph(directed=True)
     else:
         G = cugraph.Graph()
-    G.from_cudf_edgelist(df_G, source=['src', 'src_0'],
-                         destination=['dst', 'dst_0'],
-                         edge_attr="weight")
+    G.from_cudf_edgelist(
+        df_G, source=["src", "src_0"], destination=["dst", "dst_0"], edge_attr="weight"
+    )
 
     k = random.randint(1, 10)
 
     seeds = G.select_random_vertices(num_vertices=k)
     vertex_paths, edge_weights, vertex_path_sizes = cugraph.node2vec_random_walks(
-        G, seeds, max_depth)
-    
+        G, seeds, max_depth
+    )
+
     path_data = (vertex_paths, edge_weights, vertex_path_sizes)
-    
+
     check_node2vec_random_walks(G, path_data, seeds, max_depth)
-    
\ No newline at end of file
diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks_mg.py
index ad6b2022e7..a2c2cdc01d 100644
--- a/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_node2vec_random_walks_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,10 +19,8 @@
 import cugraph
 import dask_cudf
 import cugraph.dask as dcg
-import cudf
 from cugraph.testing import SMALL_DATASETS
 from cugraph.datasets import karate_asymmetric
-from cugraph.structure.symmetrize import symmetrize
 from pylibcugraph.testing.utils import gen_fixture_params_product
 
 
@@ -206,7 +204,10 @@ def test_dask_mg_node2vec_random_walks(dask_client, input_graph):
     path_data, seeds, max_depth = calc_node2vec_random_walks(input_graph)
     df_G = input_graph.input_df.compute().reset_index(drop=True)
 
-    df_G = input_graph.decompress_to_edgelist(
-        return_unrenumbered_edgelist=True).compute().reset_index(drop=True)
+    df_G = (
+        input_graph.decompress_to_edgelist(return_unrenumbered_edgelist=True)
+        .compute()
+        .reset_index(drop=True)
+    )
 
     check_node2vec_random_walks(input_graph, path_data, seeds, max_depth, df_G)
diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py
index 033e6760e0..41245d4dfe 100644
--- a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
index ef2fc00af1..2b66098f05 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks.py
@@ -19,10 +19,9 @@
 
 import cudf
 import cugraph
-from cudf.testing import assert_series_equal
 from cudf.testing.testing import assert_frame_equal
 from cugraph.utilities import ensure_cugraph_obj_for_nx
-from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS, utils
+from cugraph.testing import SMALL_DATASETS, DEFAULT_DATASETS
 
 
 # =============================================================================
@@ -74,8 +73,6 @@ def calc_uniform_random_walks(G, max_depth=None):
 
     k = random.randint(1, 6)
 
-    random_walks_type = "uniform"
-
     start_vertices = G.select_random_vertices(num_vertices=k)
 
     print("\nstart_vertices is \n", start_vertices)
@@ -86,8 +83,6 @@ def calc_uniform_random_walks(G, max_depth=None):
     return (vertex_paths, edge_weights, vertex_path_sizes), start_vertices
 
 
-
-
 def check_uniform_random_walks(G, path_data, seeds, max_depth):
     invalid_edge = 0
     invalid_seeds = 0
@@ -118,33 +113,51 @@ def check_uniform_random_walks(G, path_data, seeds, max_depth):
         if i % (max_depth + 1) == 0:
             if isinstance(seeds, cudf.DataFrame):
                 assert_frame_equal(
-                    vertex_1.rename(columns={x:y for x,y in zip(vertex_1.columns,range(0,len(vertex_1.columns)))}),
-                    seeds.iloc[[i // (max_depth + 1)]].reset_index(drop=True).rename(columns={x:y for x,y in zip(seeds.columns,range(0,len(seeds.columns)))}),
-                    check_dtype=False, check_like=True)
+                    vertex_1.rename(
+                        columns={
+                            x: y
+                            for x, y in zip(
+                                vertex_1.columns, range(0, len(vertex_1.columns))
+                            )
+                        }
+                    ),
+                    seeds.iloc[[i // (max_depth + 1)]]
+                    .reset_index(drop=True)
+                    .rename(
+                        columns={
+                            x: y
+                            for x, y in zip(seeds.columns, range(0, len(seeds.columns)))
+                        }
+                    ),
+                    check_dtype=False,
+                    check_like=True,
+                )
             else:
                 if i % (max_depth + 1) == 0 and vertex_1 != seeds[i // (max_depth + 1)]:
                     invalid_seeds += 1
                     print(
                         "[ERR] Invalid seed: "
-                        " src {} != src {}".format(vertex_1, seeds[i // (max_depth + 1)])
-                )
+                        " src {} != src {}".format(
+                            vertex_1, seeds[i // (max_depth + 1)]
+                        )
+                    )
 
         if (i % (max_depth + 1)) != (max_depth):
             # These are the edges
             src = vertex_1
             dst = vertex_2
-            
+
             # check for valid edge.
             if isinstance(seeds, cudf.DataFrame):
-                if (-1 not in src.iloc[0].reset_index(drop=True)) and (-1 not in dst.iloc[0].reset_index(drop=True)):
+                if (-1 not in src.iloc[0].reset_index(drop=True)) and (
+                    -1 not in dst.iloc[0].reset_index(drop=True)
+                ):
                     edge = cudf.DataFrame()
                     edge["src"] = vertex_1["0_vertex_paths"]
                     edge["src_0"] = vertex_1["1_vertex_paths"]
                     edge["dst"] = vertex_2["0_vertex_paths"]
                     edge["dst_0"] = vertex_2["1_vertex_paths"]
 
-                    join1 = cudf.merge(df_G, edge, on=[*edge.columns])
-                    
                     assert len(cudf.merge(df_G, edge, on=[*edge.columns])) > 0
             else:
                 edge = df_G.loc[
@@ -185,7 +198,6 @@ def check_uniform_random_walks(G, path_data, seeds, max_depth):
         assert invalid_edge_wgt == 0
         assert len(e_wgt_paths) == (max_depth) * len(seeds)
 
-    
     max_path_lenth = path_data[2]
     assert max_path_lenth == max_depth
 
@@ -209,9 +221,7 @@ def test_uniform_random_walks(graph_file, directed):
     print("max_depth is ", max_depth)
     input_graph = graph_file.get_graph(create_using=cugraph.Graph(directed=directed))
 
-    path_data, seeds = calc_uniform_random_walks(
-        input_graph, max_depth=max_depth
-    )
+    path_data, seeds = calc_uniform_random_walks(input_graph, max_depth=max_depth)
 
     check_uniform_random_walks(input_graph, path_data, seeds, max_depth)
 
@@ -243,32 +253,28 @@ def test_uniform_random_walks_nx(graph_file):
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", SMALL_DATASETS)
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
-def test_uniform_random_walks_multi_column_seeds(
-    graph_file,
-    directed
-):
+def test_uniform_random_walks_multi_column_seeds(graph_file, directed):
     max_depth = random.randint(2, 10)
     df_G = graph_file.get_edgelist()
-    df_G.rename(
-        columns={"wgt": "weight"}, inplace=True)
-    df_G['src_0'] = df_G['src'] + 1000
-    df_G['dst_0'] = df_G['dst'] + 1000
+    df_G.rename(columns={"wgt": "weight"}, inplace=True)
+    df_G["src_0"] = df_G["src"] + 1000
+    df_G["dst_0"] = df_G["dst"] + 1000
 
     if directed:
         G = cugraph.Graph(directed=True)
     else:
         G = cugraph.Graph()
-    G.from_cudf_edgelist(df_G, source=['src', 'src_0'],
-                         destination=['dst', 'dst_0'],
-                         edge_attr="weight")
+    G.from_cudf_edgelist(
+        df_G, source=["src", "src_0"], destination=["dst", "dst_0"], edge_attr="weight"
+    )
 
     k = random.randint(1, 10)
 
     seeds = G.select_random_vertices(num_vertices=k)
     vertex_paths, edge_weights, vertex_path_sizes = cugraph.uniform_random_walks(
-        G, seeds, max_depth)
-    
+        G, seeds, max_depth
+    )
+
     path_data = (vertex_paths, edge_weights, vertex_path_sizes)
-    
+
     check_uniform_random_walks(G, path_data, seeds, max_depth)
-    
\ No newline at end of file
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks_mg.py
index 50aeb0ec84..c574927833 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_random_walks_mg.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,10 +19,8 @@
 import cugraph
 import dask_cudf
 import cugraph.dask as dcg
-import cudf
 from cugraph.testing import SMALL_DATASETS
 from cugraph.datasets import karate_asymmetric
-from cugraph.structure.symmetrize import symmetrize
 from pylibcugraph.testing.utils import gen_fixture_params_product
 
 
@@ -206,7 +204,10 @@ def test_dask_mg_uniform_random_walks(dask_client, input_graph):
     path_data, seeds, max_depth = calc_uniform_random_walks(input_graph)
     df_G = input_graph.input_df.compute().reset_index(drop=True)
 
-    df_G = input_graph.decompress_to_edgelist(
-        return_unrenumbered_edgelist=True).compute().reset_index(drop=True)
+    df_G = (
+        input_graph.decompress_to_edgelist(return_unrenumbered_edgelist=True)
+        .compute()
+        .reset_index(drop=True)
+    )
 
     check_uniform_random_walks(input_graph, path_data, seeds, max_depth, df_G)

From f12bded0589d8d415cc3bafe4579c7e7f0beca22 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Thu, 16 Jan 2025 10:44:33 -0800
Subject: [PATCH 58/60] update copyright

---
 python/pylibcugraph/pylibcugraph/node2vec.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pylibcugraph/pylibcugraph/node2vec.pyx b/python/pylibcugraph/pylibcugraph/node2vec.pyx
index 5729dc6e05..322a176b24 100644
--- a/python/pylibcugraph/pylibcugraph/node2vec.pyx
+++ b/python/pylibcugraph/pylibcugraph/node2vec.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at

From b7960b73e9b2f09d437443977d89dd90414e245f Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 17 Jan 2025 20:20:07 -0800
Subject: [PATCH 59/60] update pytest ini for random walks

---
 python/cugraph/pytest.ini | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/cugraph/pytest.ini b/python/cugraph/pytest.ini
index 3aa4cc5680..e1cdd949e7 100644
--- a/python/cugraph/pytest.ini
+++ b/python/cugraph/pytest.ini
@@ -71,3 +71,6 @@ filterwarnings =
           ignore:The behavior of array concatenation with empty entries is deprecated:FutureWarning
           ignore:This method is deprecated and will no longer be supported. The symmetrization:FutureWarning
           ignore:Support for accepting and returning NetworkX objects is deprecated. Please use NetworkX with the nx-cugraph backend:DeprecationWarning
+          ignore:node2vec is deprecated and will be removed in the next release in favor of node2vec_random_walks:FutureWarning
+          ignore:random_walks is deprecated and will be removed in the next release in favor of uniform_random_walks:FutureWarning
+          ignore:Coalesced path results, returned when setting legacy_result_type=True, is deprecated and will no longer be supported:FutureWarning
\ No newline at end of file

From ba015c4968687c2bf160b5b3a6675b282117327c Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke2016@gmail.com>
Date: Fri, 17 Jan 2025 20:22:49 -0800
Subject: [PATCH 60/60] fix style and update copyright

---
 python/cugraph/pytest.ini | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cugraph/pytest.ini b/python/cugraph/pytest.ini
index e1cdd949e7..335a056df4 100644
--- a/python/cugraph/pytest.ini
+++ b/python/cugraph/pytest.ini
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -73,4 +73,4 @@ filterwarnings =
           ignore:Support for accepting and returning NetworkX objects is deprecated. Please use NetworkX with the nx-cugraph backend:DeprecationWarning
           ignore:node2vec is deprecated and will be removed in the next release in favor of node2vec_random_walks:FutureWarning
           ignore:random_walks is deprecated and will be removed in the next release in favor of uniform_random_walks:FutureWarning
-          ignore:Coalesced path results, returned when setting legacy_result_type=True, is deprecated and will no longer be supported:FutureWarning
\ No newline at end of file
+          ignore:Coalesced path results, returned when setting legacy_result_type=True, is deprecated and will no longer be supported:FutureWarning