Skip to content

Commit

Permalink
Multi gpu sample edges utilities (#2064)
Browse files Browse the repository at this point in the history
Add utilities to enable multi gpu gathering to be used for mnmg sampling.

Authors:
  - Kumar Aatish (https://github.com/kaatish)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)

URL: #2064
  • Loading branch information
kaatish authored Feb 16, 2022
1 parent 41c3b70 commit f6a92fc
Show file tree
Hide file tree
Showing 9 changed files with 1,643 additions and 144 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ add_library(cugraph SHARED
src/community/legacy/extract_subgraph_by_vertex.cu
src/community/legacy/egonet.cu
src/sampling/random_walks.cu
src/sampling/detail/gather_utils_impl.cu
src/cores/legacy/core_number.cu
src/cores/core_number_sg.cu
src/cores/core_number_mg.cu
Expand Down
192 changes: 192 additions & 0 deletions cpp/include/cugraph/detail/graph_functions.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cugraph/matrix_partition_device_view.cuh>
#include <cugraph/partition_manager.hpp>
#include <cugraph/utilities/device_comm.cuh>
#include <cugraph/utilities/host_scalar_comm.cuh>

#include <raft/handle.hpp>

#include <thrust/binary_search.h>
#include <thrust/distance.h>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/remove.h>
#include <thrust/sort.h>
#include <thrust/tabulate.h>

#include <rmm/device_uvector.hpp>

#include <numeric>
#include <vector>

namespace cugraph {

namespace detail {

/**
* @brief Compute local out degrees of the sources belonging to the adjacency matrices
* stored on each gpu
*
* Iterate through partitions and store their local degrees
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @return A single vector containing the local out degrees of the sources belong to the adjacency
* matrices
*/
template <typename GraphViewType>
rmm::device_uvector<typename GraphViewType::edge_type> compute_local_major_degrees(
raft::handle_t const& handle, GraphViewType const& graph_view);

/**
* @brief Calculate global degree information for all vertices represented by current gpu
*
* Calculate local degree and perform row wise exclusive scan over all gpus in column
* communicator.
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @return Tuple of two device vectors. The first one contains per source edge-count encountered
* by gpus in the column communicator before current gpu. The second device vector contains the
* global out degree for every source represented by current gpu
*/
template <typename GraphViewType>
std::tuple<rmm::device_uvector<typename GraphViewType::edge_type>,
rmm::device_uvector<typename GraphViewType::edge_type>>
get_global_degree_information(raft::handle_t const& handle, GraphViewType const& graph_view);

/**
* @brief Gather active sources and associated client gpu ids across gpus in a
* column communicator
*
* Collect all the vertex ids and client gpu ids to be processed by every gpu in
* the column communicator and call sort on the list.
*
* @tparam vertex_t Type of vertex indices.
* @tparam VertexIterator Type of the iterator for vertex identifiers.
* @tparam GPUIdIterator Type of the iterator for gpu id identifiers.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param vertex_input_first Iterator pointing to the first vertex id to be processed
* @param vertex_input_last Iterator pointing to the last (exclusive) vertex id to be processed
* @param gpu_id_first Iterator pointing to the first gpu id to be processed
* @return Device vector containing all the vertices that are to be processed by every gpu
* in the column communicator
*/
template <typename GraphViewType, typename VertexIterator, typename GPUIdIterator>
std::tuple<rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<typename std::iterator_traits<GPUIdIterator>::value_type>>
gather_active_sources_in_row(raft::handle_t const& handle,
GraphViewType const& graph_view,
VertexIterator vertex_input_first,
VertexIterator vertex_input_last,
GPUIdIterator gpu_id_first);

/**
* @brief Return global out degrees of active sources
*
* Get partition information of all graph partitions on the gpu and select
* global degrees of all active sources
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @param active_majors Device vector containing all the vertex id that are processed by
* gpus in the column communicator
* @param global_out_degrees Global out degrees for every source represented by current gpu
* @return Global out degrees of all sources in active_majors
*/
template <typename GraphViewType>
rmm::device_uvector<typename GraphViewType::edge_type> get_active_major_global_degrees(
raft::handle_t const& handle,
GraphViewType const& graph_view,
const rmm::device_uvector<typename GraphViewType::vertex_type>& active_majors,
const rmm::device_uvector<typename GraphViewType::edge_type>& global_out_degrees);

/**
* @brief Return partition information of all vertex ids of all the partitions belonging to a gpu
*
* Iterate through partitions and store the starting vertex ids, exclusive scan of vertex counts,
* offsets and indices of the partitions csr structure
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @return Tuple of device vectors. The first vector contains all the partitions related to the
* gpu. The second and third vectors contain starting and ending vertex ids of all the partitions
* belonging to the gpu. The fourth vector contains the starting vertex id of the hypersparse
* region in each partition. The fifth vector denotes the vertex count offset (how many vertices
* are dealt with by the previous partitions.
*/
template <typename GraphViewType>
std::tuple<rmm::device_uvector<matrix_partition_device_view_t<typename GraphViewType::vertex_type,
typename GraphViewType::edge_type,
typename GraphViewType::weight_type,
GraphViewType::is_multi_gpu>>,
rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<typename GraphViewType::vertex_type>>
partition_information(raft::handle_t const& handle, GraphViewType const& graph_view);

/**
* @brief Gather valid edges present on the current gpu
*
* Collect all the edges that are present in the adjacency lists on the current gpu
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @tparam EdgeIndexIterator Type of the iterator for edge indices.
* @tparam GPUIdIterator Type of the iterator for gpu id identifiers.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @param active_majors_in_row Device vector containing all the vertex id that are processed by
* gpus in the column communicator
* @param active_major_gpu_ids Device vector containing the gpu id associated by every vertex
* present in active_majors_in_row
* @param edge_index_first Iterator pointing to the first destination index
* @param indices_per_source Number of indices supplied for every source in the range
* [vertex_input_first, vertex_input_last)
* @param global_degree_offset Global degree offset to local adjacency list for every source
* represented by current gpu
* @return A tuple of device vector containing the majors, minors and gpu_ids gathered locally
*/
template <typename GraphViewType, typename EdgeIndexIterator, typename gpu_t>
std::tuple<rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<gpu_t>>
gather_local_edges(
raft::handle_t const& handle,
GraphViewType const& graph_view,
const rmm::device_uvector<typename GraphViewType::vertex_type>& active_majors_in_row,
const rmm::device_uvector<gpu_t>& active_major_gpu_ids,
EdgeIndexIterator edge_index_first,
typename GraphViewType::edge_type indices_per_major,
const rmm::device_uvector<typename GraphViewType::edge_type>& global_degree_offsets);

} // namespace detail

} // namespace cugraph
15 changes: 14 additions & 1 deletion cpp/include/cugraph/graph_view.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cugraph/matrix_partition_view.hpp>
Expand Down Expand Up @@ -632,6 +633,12 @@ class graph_view_t<vertex_t,
return local_sorted_unique_edge_col_offsets_;
}

std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
std::optional<rmm::device_uvector<weight_t>>>
decompress_to_edgelist(raft::handle_t const& handle,
std::optional<rmm::device_uvector<vertex_t>> const& renumber_map) const;

private:
std::vector<edge_t const*> adj_matrix_partition_offsets_{};
std::vector<vertex_t const*> adj_matrix_partition_indices_{};
Expand Down Expand Up @@ -859,6 +866,12 @@ class graph_view_t<vertex_t,
return std::nullopt;
}

std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
std::optional<rmm::device_uvector<weight_t>>>
decompress_to_edgelist(raft::handle_t const& handle,
std::optional<rmm::device_uvector<vertex_t>> const& renumber_map) const;

private:
edge_t const* offsets_{nullptr};
vertex_t const* indices_{nullptr};
Expand Down
Loading

0 comments on commit f6a92fc

Please sign in to comment.