Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gpuCI] Forward-merge branch-0.19 to branch-0.20 [skip ci] #1522

Merged
merged 1 commit into from
Apr 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ As of Release 0.18 - including 0.18 nightly
| | Breadth First Search (BFS) | Multi-GPU | with cutoff support <br/> [C++ README](cpp/src/traversal/README.md#BFS) |
| | Single Source Shortest Path (SSSP) | Multi-GPU | [C++ README](cpp/src/traversal/README.md#SSSP) |
| | Traveling Salesperson Problem (TSP) | Single-GPU | |
| Sampling | Random Walks (RW) | Single-GPU | |
| Structure | | | |
| | Renumbering | Single-GPU | multiple columns, any data type |
| | Symmetrize | Multi-GPU | |
Expand Down
11 changes: 11 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,17 @@ Overlap Coefficient
:undoc-members:


Sampling
========

Random Walks
------------

.. automodule:: cugraph.sampling.random_walks
:members:
:undoc-members:


Traversal
=========

Expand Down
2 changes: 2 additions & 0 deletions python/cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@
from cugraph.raft import raft_include_test
from cugraph.comms import comms

from cugraph.sampling import random_walks

# Versioneer
from ._version import get_versions

Expand Down
14 changes: 14 additions & 0 deletions python/cugraph/sampling/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.sampling.random_walks import random_walks
22 changes: 22 additions & 0 deletions python/cugraph/sampling/random_walks.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#from cugraph.structure.graph_primtypes cimport *
from cugraph.structure.graph_utilities cimport *

cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
cdef unique_ptr[random_walk_ret_t] call_random_walks[vertex_t, edge_t](
const handle_t &handle,
const graph_container_t &g,
const vertex_t *ptr_d_start,
edge_t num_paths,
edge_t max_depth) except +
95 changes: 95 additions & 0 deletions python/cugraph/sampling/random_walks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import cudf
from cugraph.sampling import random_walks_wrapper
import cugraph
from collections import defaultdict

# FIXME might be more efficient to return either (df + offset) or 3 cudf.Series


def random_walks(
G,
start_vertices,
max_depth=None
):
"""
compute random walks for each nodes in 'start_vertices'

parameters
----------
G : cuGraph.Graph or networkx.Graph
The graph can be either directed (DiGraph) or undirected (Graph).
Weights in the graph are ignored.
Use weight parameter if weights need to be considered
(currently not supported)

start_vertices : int or list or cudf.Series
A single node or a list or a cudf.Series of nodes from which to run
the random walks

max_depth : int
The maximum depth of the random walks


Returns
-------
random_walks_edge_lists : cudf.DataFrame
GPU data frame containing all random walks sources identifiers,
destination identifiers, edge weights

seeds_offsets: cudf.Series
Series containing the starting offset in the returned edge list
for each vertex in start_vertices.
"""
if max_depth is None:
raise TypeError("must specify a 'max_depth'")

G, _ = cugraph.utilities.check_nx_graph(G)

if start_vertices is int:
start_vertices = [start_vertices]

if not isinstance(start_vertices, cudf.Series):
start_vertices = cudf.Series(start_vertices)

if G.renumbered is True:
start_vertices = G.lookup_internal_vertex_id(start_vertices)
vertex_set, edge_set, sizes = random_walks_wrapper.random_walks(
G, start_vertices, max_depth)

if G.renumbered:
df_ = cudf.DataFrame()
df_['vertex_set'] = vertex_set
df_ = G.unrenumber(df_, 'vertex_set', preserve_order=True)
vertex_set = cudf.Series(df_['vertex_set'])

edge_list = defaultdict(list)
next_path_idx = 0
offsets = [0]

df = cudf.DataFrame()
for s in sizes.values_host:
for i in range(next_path_idx, s+next_path_idx-1):
edge_list['src'].append(vertex_set.values_host[i])
edge_list['dst'].append(vertex_set.values_host[i+1])
next_path_idx += s
df = df.append(edge_list, ignore_index=True)
offsets.append(df.index[-1]+1)
edge_list['src'].clear()
edge_list['dst'].clear()
df['weight'] = edge_set
offsets = cudf.Series(offsets)

return df, offsets
116 changes: 116 additions & 0 deletions python/cugraph/sampling/random_walks_wrapper.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from cugraph.sampling.random_walks cimport call_random_walks
#from cugraph.structure.graph_primtypes cimport *
from cugraph.structure.graph_utilities cimport *
from libcpp cimport bool
from libcpp.utility cimport move
from libc.stdint cimport uintptr_t
from cugraph.structure import graph_primtypes_wrapper
import cudf
import rmm
import numpy as np
import numpy.ctypeslib as ctypeslib
from rmm._lib.device_buffer cimport DeviceBuffer
from cudf.core.buffer import Buffer
from cython.operator cimport dereference as deref
def random_walks(input_graph, start_vertices, max_depth):
"""
Call random_walks
"""
# FIXME: Offsets and indices are currently hardcoded to int, but this may
# not be acceptable in the future.
numberTypeMap = {np.dtype("int32") : <int>numberTypeEnum.int32Type,
np.dtype("int64") : <int>numberTypeEnum.int64Type,
np.dtype("float32") : <int>numberTypeEnum.floatType,
np.dtype("double") : <int>numberTypeEnum.doubleType}
[src, dst] = [input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']]
vertex_t = src.dtype
edge_t = np.dtype("int32")
weights = None
if input_graph.edgelist.weights:
weights = input_graph.edgelist.edgelist_df['weights']
num_verts = input_graph.number_of_vertices()
num_edges = input_graph.number_of_edges(directed_edges=True)
num_partition_edges = num_edges

if num_edges > (2**31 - 1):
edge_t = np.dtype("int64")
cdef unique_ptr[random_walk_ret_t] rw_ret_ptr

cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0]
cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0]
cdef uintptr_t c_edge_weights = <uintptr_t>NULL
if weights is not None:
c_edge_weights = weights.__cuda_array_interface__['data'][0]
weight_t = weights.dtype
is_weighted = True
else:
weight_t = np.dtype("float32")
is_weighted = False
# Pointers for random_walks
start_vertices = start_vertices.astype('int32')
cdef uintptr_t c_start_vertex_ptr = start_vertices.__cuda_array_interface__['data'][0]
num_paths = start_vertices.size
cdef unique_ptr[handle_t] handle_ptr
handle_ptr.reset(new handle_t())
handle_ = handle_ptr.get()
cdef graph_container_t graph_container
populate_graph_container(graph_container,
handle_[0],
<void*>c_src_vertices, <void*>c_dst_vertices, <void*>c_edge_weights,
<void*>NULL,
<numberTypeEnum>(<int>(numberTypeMap[vertex_t])),
<numberTypeEnum>(<int>(numberTypeMap[edge_t])),
<numberTypeEnum>(<int>(numberTypeMap[weight_t])),
num_partition_edges,
num_verts,
num_edges,
False,
is_weighted,
False, False)
if(vertex_t == np.dtype("int32")):
if(edge_t == np.dtype("int32")):
rw_ret_ptr = move(call_random_walks[int, int]( deref(handle_),
graph_container,
<int*> c_start_vertex_ptr,
<int> num_paths,
<int> max_depth))
else: # (edge_t == np.dtype("int64")):
rw_ret_ptr = move(call_random_walks[int, long]( deref(handle_),
graph_container,
<int*> c_start_vertex_ptr,
<long> num_paths,
<long> max_depth))
else: # (vertex_t == edge_t == np.dtype("int64")):
rw_ret_ptr = move(call_random_walks[long, long]( deref(handle_),
graph_container,
<long*> c_start_vertex_ptr,
<long> num_paths,
<long> max_depth))


rw_ret= move(rw_ret_ptr.get()[0])
vertex_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_v_))
edge_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_w_))
sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_))
vertex_set = Buffer(vertex_set)
edge_set = Buffer(edge_set)
sizes = Buffer(sizes)

set_vertex = cudf.Series(data=vertex_set, dtype=vertex_t)
set_edge = cudf.Series(data=edge_set, dtype=weight_t)
set_sizes = cudf.Series(data=sizes, dtype=edge_t)

return set_vertex, set_edge, set_sizes

9 changes: 9 additions & 0 deletions python/cugraph/structure/graph_utilities.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,15 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
unique_ptr[device_buffer] dst_indices
unique_ptr[device_buffer] edge_data
unique_ptr[device_buffer] subgraph_offsets

cdef cppclass random_walk_ret_t:
size_t coalesced_sz_v_
size_t coalesced_sz_w_
size_t num_paths_
size_t max_depth_
unique_ptr[device_buffer] d_coalesced_v_
unique_ptr[device_buffer] d_coalesced_w_
unique_ptr[device_buffer] d_sizes_

cdef extern from "<utility>" namespace "std" nogil:
cdef device_buffer move(device_buffer)
Expand Down
Loading