Skip to content

Commit

Permalink
Bipartite R-mat graph generation. (#3512)
Browse files Browse the repository at this point in the history
Addresses #2075

This function will generate (source, destination) vertex ID pairs. Source vertex IDs will have values in `[0, 2^src_scale)` and destination vertex IDs will have values in `[0, 2^dst_scale)`.

Additionally,

* `scramble_vertex_ids` function had unused input parameters and it was internally erroneously setting scale. Fixed this bug.

* Rmat_Usecase was ignoring scramble_vertex_ids flag, fixed this bug.

* Added `scramble_vertex_ids` that take a just single vertex list (instead of src, dst pair)

* Update `scramble_vertex_ids` to take input vectors as R-values and return scrambled vectors (instead taking in/out parameters)

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Naim (https://github.com/naimnv)
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Brad Rees (https://github.com/BradReesWork)

URL: #3512
  • Loading branch information
seunghwak authored May 1, 2023
1 parent 5d739af commit e271bad
Show file tree
Hide file tree
Showing 10 changed files with 771 additions and 135 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ set(CUGRAPH_SOURCES
src/components/legacy/connectivity.cu
src/centrality/legacy/betweenness_centrality.cu
src/generators/generate_rmat_edgelist.cu
src/generators/generate_bipartite_rmat_edgelist.cu
src/generators/generator_tools.cu
src/generators/simple_generators.cu
src/generators/erdos_renyi_generator.cu
Expand Down
81 changes: 70 additions & 11 deletions cpp/include/cugraph/graph_generators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,45 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> generat
double c = 0.19,
bool clip_and_flip = false);

/**
* @brief generate an edge list for a bipartite R-mat graph.
*
* The source vertex IDs will be in the range of [0, 2^src_scale) and the destination vertex IDs
* will be in the range of [0, 2^dst_scale). This function allows multi-edges.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param rng_state RAFT RNG state, updated with each call
* @param src_scale Scale factor to set the range of source vertex IDs (or the first vertex set) in
* the bipartite graph. Vertex IDs have values in [0, V_src), where V_src = 1 << @p src_scale.
* @param dst_scale Scale factor to set the range of destination vertex IDs (or the second vertex
* set) in the bipartite graph. Vertex IDs have values in [0, V_dst), where V_dst = 1 << @p
* dst_scale.
* @param num_edges Number of edges to generate.
* @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
* for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
* than 1.0.
* @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
* for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
* than 1.0.
* @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
* for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
* than 1.0.
* @return std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> A tuple of
* rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs.
*/
template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>>
generate_bipartite_rmat_edgelist(raft::handle_t const& handle,
raft::random::RngState& rng_state,
size_t src_scale,
size_t dst_scale,
size_t num_edges,
double a = 0.57,
double b = 0.19,
double c = 0.19);

enum class generator_distribution_t { POWER_LAW = 0, UNIFORM };

/**
Expand Down Expand Up @@ -408,29 +447,49 @@ symmetrize_edgelist_from_triangular(
std::optional<rmm::device_uvector<weight_t>>&& optional_d_weights_v,
bool check_diagonal = false);

/**
* @brief scramble vertex IDs in a graph
*
* Given a vertex list for a graph, scramble the input vertex IDs.
*
* The scramble code here follows the algorithm in the Graph 500 reference
* implementation version 3.0.0.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param vertices Vector of input vertices
* @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN).
* @return rmm::device_uvector object storing scrambled vertex IDs.
*/
template <typename vertex_t>
rmm::device_uvector<vertex_t> scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& vertices,
size_t lgN);

/**
* @brief scramble vertex ids in a graph
*
* Given an edgelist for a graph, scramble all vertex ids by the given offset.
* This translation is done in place.
* Given an edge list for a graph, scramble the input vertex IDs.
*
* The scramble code here follows the algorithm in the Graph 500 reference
* implementation version 3.0.0.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param d_src_v Vector of source vertices
* @param d_dst_v Vector of destination vertices
* @param vertex_id_offset Offset to add to each vertex id
* @param seed Used to initialize random number generator
* @param d_src_v Vector of input source vertices
* @param d_dst_v Vector of input destination vertices
* @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN).
* @return Tuple of two rmm::device_uvector objects storing scrambled source & destination vertex
* IDs, respectively.
*/
template <typename vertex_t>
void scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>& d_src_v,
rmm::device_uvector<vertex_t>& d_dst_v,
vertex_t vertex_id_offset,
uint64_t seed = 0);
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> scramble_vertex_ids(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& srcs,
rmm::device_uvector<vertex_t>&& dsts,
size_t lgN);

/**
* @brief Combine edgelists from multiple sources into a single edgelist
Expand Down
141 changes: 141 additions & 0 deletions cpp/src/generators/generate_bipartite_rmat_edgelist.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cugraph/detail/utility_wrappers.hpp>
#include <cugraph/graph_generators.hpp>
#include <cugraph/utilities/error.hpp>

#include <raft/core/handle.hpp>
#include <raft/random/rng.cuh>

#include <rmm/device_uvector.hpp>

#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>

#include <rmm/detail/error.hpp>
#include <tuple>

namespace cugraph {

template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>>
generate_bipartite_rmat_edgelist(raft::handle_t const& handle,
raft::random::RngState& rng_state,
size_t src_scale,
size_t dst_scale,
size_t num_edges,
double a,
double b,
double c)
{
CUGRAPH_EXPECTS(
(size_t{1} << src_scale) <= static_cast<size_t>(std::numeric_limits<vertex_t>::max()),
"Invalid input argument: src_scale too large for vertex_t.");
CUGRAPH_EXPECTS(
(size_t{1} << dst_scale) <= static_cast<size_t>(std::numeric_limits<vertex_t>::max()),
"Invalid input argument: dst_scale too large for vertex_t.");
CUGRAPH_EXPECTS((a >= 0.0) && (b >= 0.0) && (c >= 0.0) && (a + b + c <= 1.0),
"Invalid input argument: a, b, c should be non-negative and a + b + c should not "
"be larger than 1.0.");

// to limit memory footprint (1024 is a tuning parameter)
auto max_edges_to_generate_per_iteration =
static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * 1024;
rmm::device_uvector<float> rands(
std::min(num_edges, max_edges_to_generate_per_iteration) * (src_scale + dst_scale),
handle.get_stream());

rmm::device_uvector<vertex_t> srcs(num_edges, handle.get_stream());
rmm::device_uvector<vertex_t> dsts(num_edges, handle.get_stream());

size_t num_edges_generated{0};
while (num_edges_generated < num_edges) {
auto num_edges_to_generate =
std::min(num_edges - num_edges_generated, max_edges_to_generate_per_iteration);
auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())) +
num_edges_generated;

detail::uniform_random_fill(handle.get_stream(),
rands.data(),
num_edges_to_generate * (src_scale + dst_scale),
0.0f,
1.0f,
rng_state);

thrust::transform(
handle.get_thrust_policy(),
thrust::make_counting_iterator(size_t{0}),
thrust::make_counting_iterator(num_edges_to_generate),
pair_first,
// if a + b == 0.0, a_norm is irrelevant, if (1.0 - (a+b)) == 0.0, c_norm is irrelevant
[src_scale,
dst_scale,
rands = rands.data(),
a_plus_b = a + b,
a_plus_c = a + c,
a_norm = (a + b) > 0.0 ? a / (a + b) : 0.0,
c_norm = (1.0 - (a + b)) > 0.0 ? c / (1.0 - (a + b)) : 0.0] __device__(auto i) {
vertex_t src{0};
vertex_t dst{0};
size_t rand_offset = i * (src_scale + dst_scale);
for (int level = 0; level < static_cast<int>(std::max(src_scale, dst_scale)); ++level) {
auto dst_threshold = a_plus_c;
if (level < src_scale) {
auto r = rands[rand_offset++];
auto src_bit_set = r > a_plus_b;
src +=
src_bit_set ? static_cast<vertex_t>(vertex_t{1} << (src_scale - (level + 1))) : 0;
dst_threshold = src_bit_set ? c_norm : a_norm;
}
if (level < dst_scale) {
auto r = rands[rand_offset++];
auto dst_bit_set = r > dst_threshold;
dst +=
dst_bit_set ? static_cast<vertex_t>(vertex_t{1} << (dst_scale - (level + 1))) : 0;
}
}
return thrust::make_tuple(src, dst);
});
num_edges_generated += num_edges_to_generate;
}

return std::make_tuple(std::move(srcs), std::move(dsts));
}

template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>
generate_bipartite_rmat_edgelist<int32_t>(raft::handle_t const& handle,
raft::random::RngState& rng_state,
size_t src_scale,
size_t dst_scale,
size_t num_edges,
double a,
double b,
double c);

template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>
generate_bipartite_rmat_edgelist<int64_t>(raft::handle_t const& handle,
raft::random::RngState& rng_state,
size_t src_scale,
size_t dst_scale,
size_t num_edges,
double a,
double b,
double c);

} // namespace cugraph
83 changes: 49 additions & 34 deletions cpp/src/generators/generator_tools.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,10 +43,10 @@ template <typename T>
rmm::device_uvector<T> append_all(raft::handle_t const& handle,
std::vector<rmm::device_uvector<T>>&& input)
{
size_t size{0};
// for (size_t i = 0; i < input.size(); ++i) size += input[i].size();
for (auto& element : input)
size += element.size();
auto size = std::transform_reduce(
input.begin(), input.end(), size_t{0}, std::plus<size_t>{}, [](auto const& element) {
return element.size();
});

rmm::device_uvector<T> output(size, handle.get_stream());
auto output_iter = output.begin();
Expand All @@ -56,36 +56,43 @@ rmm::device_uvector<T> append_all(raft::handle_t const& handle,
output_iter += element.size();
}

/*
for (size_t i = 0; i < input.size(); ++i) {
raft::copy(output_iter, input[i].begin(), input[i].size(), handle.get_stream());
output_iter += input[i].size();
}
*/

return output;
}

} // namespace detail

template <typename vertex_t>
void scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>& d_src_v,
rmm::device_uvector<vertex_t>& d_dst_v,
vertex_t vertex_id_offset,
uint64_t seed)
rmm::device_uvector<vertex_t> scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& vertices,
size_t lgN)
{
vertex_t scale = 1 + raft::log2(d_src_v.size());
thrust::transform(handle.get_thrust_policy(),
vertices.begin(),
vertices.end(),
vertices.begin(),
[lgN] __device__(auto v) { return detail::scramble(v, lgN); });

auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin()));
return std::move(vertices);
}

template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> scramble_vertex_ids(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& srcs,
rmm::device_uvector<vertex_t>&& dsts,
size_t lgN)
{
auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin()));
thrust::transform(handle.get_thrust_policy(),
pair_first,
pair_first + d_src_v.size(),
pair_first + srcs.size(),
pair_first,
[scale] __device__(auto pair) {
return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), scale),
detail::scramble(thrust::get<1>(pair), scale));
[lgN] __device__(auto pair) {
return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), lgN),
detail::scramble(thrust::get<1>(pair), lgN));
});

return std::make_tuple(std::move(srcs), std::move(dsts));
}

template <typename vertex_t, typename weight_t>
Expand Down Expand Up @@ -250,17 +257,25 @@ symmetrize_edgelist_from_triangular(
optional_d_weights_v ? std::move(optional_d_weights_v) : std::nullopt);
}

template void scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<int32_t>& d_src_v,
rmm::device_uvector<int32_t>& d_dst_v,
int32_t vertex_id_offset,
uint64_t seed);

template void scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<int64_t>& d_src_v,
rmm::device_uvector<int64_t>& d_dst_v,
int64_t vertex_id_offset,
uint64_t seed);
template rmm::device_uvector<int32_t> scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& vertices,
size_t lgN);

template rmm::device_uvector<int64_t> scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<int64_t>&& vertices,
size_t lgN);

template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>> scramble_vertex_ids(
raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& srcs,
rmm::device_uvector<int32_t>&& dsts,
size_t lgN);

template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>> scramble_vertex_ids(
raft::handle_t const& handle,
rmm::device_uvector<int64_t>&& srcs,
rmm::device_uvector<int64_t>&& dsts,
size_t lgN);

template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
Expand Down
Loading

0 comments on commit e271bad

Please sign in to comment.