Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bipartite R-mat graph generation. #3512

Merged
merged 18 commits into from
May 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ set(CUGRAPH_SOURCES
src/components/legacy/connectivity.cu
src/centrality/legacy/betweenness_centrality.cu
src/generators/generate_rmat_edgelist.cu
src/generators/generate_bipartite_rmat_edgelist.cu
src/generators/generator_tools.cu
src/generators/simple_generators.cu
src/generators/erdos_renyi_generator.cu
Expand Down
81 changes: 70 additions & 11 deletions cpp/include/cugraph/graph_generators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,45 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> generat
double c = 0.19,
bool clip_and_flip = false);

/**
* @brief generate an edge list for a bipartite R-mat graph.
*
* The source vertex IDs will be in the range of [0, 2^src_scale) and the destination vertex IDs
* will be in the range of [0, 2^dst_scale). This function allows multi-edges.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param rng_state RAFT RNG state, updated with each call
* @param src_scale Scale factor to set the range of source vertex IDs (or the first vertex set) in
* the bipartite graph. Vertex IDs have values in [0, V_src), where V_src = 1 << @p src_scale.
* @param dst_scale Scale factor to set the range of destination vertex IDs (or the second vertex
* set) in the bipartite graph. Vertex IDs have values in [0, V_dst), where V_dst = 1 << @p
* dst_scale.
* @param num_edges Number of edges to generate.
* @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
* for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
* than 1.0.
* @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
* for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
* than 1.0.
* @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
* for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
* than 1.0.
* @return std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> A tuple of
* rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs.
*/
template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>>
generate_bipartite_rmat_edgelist(raft::handle_t const& handle,
raft::random::RngState& rng_state,
size_t src_scale,
size_t dst_scale,
size_t num_edges,
double a = 0.57,
double b = 0.19,
double c = 0.19);

enum class generator_distribution_t { POWER_LAW = 0, UNIFORM };

/**
Expand Down Expand Up @@ -408,29 +447,49 @@ symmetrize_edgelist_from_triangular(
std::optional<rmm::device_uvector<weight_t>>&& optional_d_weights_v,
bool check_diagonal = false);

/**
* @brief scramble vertex IDs in a graph
*
* Given a vertex list for a graph, scramble the input vertex IDs.
*
* The scramble code here follows the algorithm in the Graph 500 reference
* implementation version 3.0.0.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param vertices Vector of input vertices
* @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN).
* @return rmm::device_uvector object storing scrambled vertex IDs.
*/
template <typename vertex_t>
rmm::device_uvector<vertex_t> scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& vertices,
size_t lgN);

/**
* @brief scramble vertex ids in a graph
*
* Given an edgelist for a graph, scramble all vertex ids by the given offset.
* This translation is done in place.
* Given an edge list for a graph, scramble the input vertex IDs.
*
* The scramble code here follows the algorithm in the Graph 500 reference
* implementation version 3.0.0.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param d_src_v Vector of source vertices
* @param d_dst_v Vector of destination vertices
* @param vertex_id_offset Offset to add to each vertex id
* @param seed Used to initialize random number generator
* @param d_src_v Vector of input source vertices
* @param d_dst_v Vector of input destination vertices
* @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN).
* @return Tuple of two rmm::device_uvector objects storing scrambled source & destination vertex
* IDs, respectively.
*/
template <typename vertex_t>
void scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>& d_src_v,
rmm::device_uvector<vertex_t>& d_dst_v,
vertex_t vertex_id_offset,
uint64_t seed = 0);
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> scramble_vertex_ids(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& srcs,
rmm::device_uvector<vertex_t>&& dsts,
size_t lgN);

/**
* @brief Combine edgelists from multiple sources into a single edgelist
Expand Down
141 changes: 141 additions & 0 deletions cpp/src/generators/generate_bipartite_rmat_edgelist.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cugraph/detail/utility_wrappers.hpp>
#include <cugraph/graph_generators.hpp>
#include <cugraph/utilities/error.hpp>

#include <raft/core/handle.hpp>
#include <raft/random/rng.cuh>

#include <rmm/device_uvector.hpp>

#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>

#include <rmm/detail/error.hpp>
#include <tuple>

namespace cugraph {

template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>>
generate_bipartite_rmat_edgelist(raft::handle_t const& handle,
raft::random::RngState& rng_state,
size_t src_scale,
size_t dst_scale,
size_t num_edges,
double a,
double b,
double c)
{
CUGRAPH_EXPECTS(
(size_t{1} << src_scale) <= static_cast<size_t>(std::numeric_limits<vertex_t>::max()),
"Invalid input argument: src_scale too large for vertex_t.");
CUGRAPH_EXPECTS(
(size_t{1} << dst_scale) <= static_cast<size_t>(std::numeric_limits<vertex_t>::max()),
"Invalid input argument: dst_scale too large for vertex_t.");
CUGRAPH_EXPECTS((a >= 0.0) && (b >= 0.0) && (c >= 0.0) && (a + b + c <= 1.0),
"Invalid input argument: a, b, c should be non-negative and a + b + c should not "
"be larger than 1.0.");

// to limit memory footprint (1024 is a tuning parameter)
auto max_edges_to_generate_per_iteration =
static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * 1024;
rmm::device_uvector<float> rands(
std::min(num_edges, max_edges_to_generate_per_iteration) * (src_scale + dst_scale),
handle.get_stream());

rmm::device_uvector<vertex_t> srcs(num_edges, handle.get_stream());
rmm::device_uvector<vertex_t> dsts(num_edges, handle.get_stream());

size_t num_edges_generated{0};
while (num_edges_generated < num_edges) {
auto num_edges_to_generate =
std::min(num_edges - num_edges_generated, max_edges_to_generate_per_iteration);
auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())) +
num_edges_generated;

detail::uniform_random_fill(handle.get_stream(),
rands.data(),
num_edges_to_generate * (src_scale + dst_scale),
0.0f,
1.0f,
rng_state);

thrust::transform(
handle.get_thrust_policy(),
thrust::make_counting_iterator(size_t{0}),
thrust::make_counting_iterator(num_edges_to_generate),
pair_first,
// if a + b == 0.0, a_norm is irrelevant, if (1.0 - (a+b)) == 0.0, c_norm is irrelevant
[src_scale,
dst_scale,
rands = rands.data(),
a_plus_b = a + b,
a_plus_c = a + c,
a_norm = (a + b) > 0.0 ? a / (a + b) : 0.0,
c_norm = (1.0 - (a + b)) > 0.0 ? c / (1.0 - (a + b)) : 0.0] __device__(auto i) {
vertex_t src{0};
vertex_t dst{0};
size_t rand_offset = i * (src_scale + dst_scale);
for (int level = 0; level < static_cast<int>(std::max(src_scale, dst_scale)); ++level) {
auto dst_threshold = a_plus_c;
if (level < src_scale) {
auto r = rands[rand_offset++];
auto src_bit_set = r > a_plus_b;
src +=
src_bit_set ? static_cast<vertex_t>(vertex_t{1} << (src_scale - (level + 1))) : 0;
dst_threshold = src_bit_set ? c_norm : a_norm;
}
if (level < dst_scale) {
auto r = rands[rand_offset++];
auto dst_bit_set = r > dst_threshold;
dst +=
dst_bit_set ? static_cast<vertex_t>(vertex_t{1} << (dst_scale - (level + 1))) : 0;
}
}
return thrust::make_tuple(src, dst);
});
num_edges_generated += num_edges_to_generate;
}

return std::make_tuple(std::move(srcs), std::move(dsts));
}

template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>
generate_bipartite_rmat_edgelist<int32_t>(raft::handle_t const& handle,
raft::random::RngState& rng_state,
size_t src_scale,
size_t dst_scale,
size_t num_edges,
double a,
double b,
double c);

template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>
generate_bipartite_rmat_edgelist<int64_t>(raft::handle_t const& handle,
raft::random::RngState& rng_state,
size_t src_scale,
size_t dst_scale,
size_t num_edges,
double a,
double b,
double c);

} // namespace cugraph
83 changes: 49 additions & 34 deletions cpp/src/generators/generator_tools.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,10 +43,10 @@ template <typename T>
rmm::device_uvector<T> append_all(raft::handle_t const& handle,
std::vector<rmm::device_uvector<T>>&& input)
{
size_t size{0};
// for (size_t i = 0; i < input.size(); ++i) size += input[i].size();
for (auto& element : input)
size += element.size();
auto size = std::transform_reduce(
input.begin(), input.end(), size_t{0}, std::plus<size_t>{}, [](auto const& element) {
return element.size();
});

rmm::device_uvector<T> output(size, handle.get_stream());
auto output_iter = output.begin();
Expand All @@ -56,36 +56,43 @@ rmm::device_uvector<T> append_all(raft::handle_t const& handle,
output_iter += element.size();
}

/*
for (size_t i = 0; i < input.size(); ++i) {
raft::copy(output_iter, input[i].begin(), input[i].size(), handle.get_stream());
output_iter += input[i].size();
}
*/

return output;
}

} // namespace detail

template <typename vertex_t>
void scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>& d_src_v,
rmm::device_uvector<vertex_t>& d_dst_v,
vertex_t vertex_id_offset,
uint64_t seed)
rmm::device_uvector<vertex_t> scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& vertices,
size_t lgN)
{
vertex_t scale = 1 + raft::log2(d_src_v.size());
thrust::transform(handle.get_thrust_policy(),
vertices.begin(),
vertices.end(),
vertices.begin(),
[lgN] __device__(auto v) { return detail::scramble(v, lgN); });

auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin()));
return std::move(vertices);
}

template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> scramble_vertex_ids(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& srcs,
rmm::device_uvector<vertex_t>&& dsts,
size_t lgN)
{
auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin()));
thrust::transform(handle.get_thrust_policy(),
pair_first,
pair_first + d_src_v.size(),
pair_first + srcs.size(),
pair_first,
[scale] __device__(auto pair) {
return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), scale),
detail::scramble(thrust::get<1>(pair), scale));
[lgN] __device__(auto pair) {
return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), lgN),
detail::scramble(thrust::get<1>(pair), lgN));
});

return std::make_tuple(std::move(srcs), std::move(dsts));
}

template <typename vertex_t, typename weight_t>
Expand Down Expand Up @@ -250,17 +257,25 @@ symmetrize_edgelist_from_triangular(
optional_d_weights_v ? std::move(optional_d_weights_v) : std::nullopt);
}

template void scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<int32_t>& d_src_v,
rmm::device_uvector<int32_t>& d_dst_v,
int32_t vertex_id_offset,
uint64_t seed);

template void scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<int64_t>& d_src_v,
rmm::device_uvector<int64_t>& d_dst_v,
int64_t vertex_id_offset,
uint64_t seed);
template rmm::device_uvector<int32_t> scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& vertices,
size_t lgN);

template rmm::device_uvector<int64_t> scramble_vertex_ids(raft::handle_t const& handle,
rmm::device_uvector<int64_t>&& vertices,
size_t lgN);

template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>> scramble_vertex_ids(
raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& srcs,
rmm::device_uvector<int32_t>&& dsts,
size_t lgN);

template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>> scramble_vertex_ids(
raft::handle_t const& handle,
rmm::device_uvector<int64_t>&& srcs,
rmm::device_uvector<int64_t>&& dsts,
size_t lgN);

template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
Expand Down
Loading