Bipartite R-mat graph generation. (#3512)

Addresses #2075 This function will generate (source, destination) vertex ID pairs. Source vertex IDs will have values in `[0, 2^src_scale)` and destination vertex IDs will have values in `[0, 2^dst_scale)`. Additionally, * `scramble_vertex_ids` function had unused input parameters and it was internally erroneously setting scale. Fixed this bug. * Rmat_Usecase was ignoring scramble_vertex_ids flag, fixed this bug. * Added `scramble_vertex_ids` that take a just single vertex list (instead of src, dst pair) * Update `scramble_vertex_ids` to take input vectors as R-values and return scrambled vectors (instead taking in/out parameters) Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Naim (https://github.com/naimnv) - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: #3512
rapidsai · May 1, 2023 · e271bad · e271bad
1 parent 5d739af
commit e271bad
Show file tree

Hide file tree

Showing 10 changed files with 771 additions and 135 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -231,6 +231,7 @@ set(CUGRAPH_SOURCES
     src/components/legacy/connectivity.cu
     src/centrality/legacy/betweenness_centrality.cu
     src/generators/generate_rmat_edgelist.cu
+    src/generators/generate_bipartite_rmat_edgelist.cu
     src/generators/generator_tools.cu
     src/generators/simple_generators.cu
     src/generators/erdos_renyi_generator.cu

diff --git a/cpp/include/cugraph/graph_generators.hpp b/cpp/include/cugraph/graph_generators.hpp
@@ -127,6 +127,45 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> generat
   double c           = 0.19,
   bool clip_and_flip = false);
 
+/**
+ * @brief generate an edge list for a bipartite R-mat graph.
+ *
+ * The source vertex IDs will be in the range of [0, 2^src_scale) and the destination vertex IDs
+ * will be in the range of [0, 2^dst_scale). This function allows multi-edges.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param rng_state RAFT RNG state, updated with each call
+ * @param src_scale Scale factor to set the range of source vertex IDs (or the first vertex set) in
+ * the bipartite graph. Vertex IDs have values in [0, V_src), where V_src = 1 << @p src_scale.
+ * @param dst_scale Scale factor to set the range of destination vertex IDs (or the second vertex
+ * set) in the bipartite graph. Vertex IDs have values in [0, V_dst), where V_dst = 1 << @p
+ * dst_scale.
+ * @param num_edges Number of edges to generate.
+ * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
+ * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
+ * than 1.0.
+ * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
+ * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
+ * than 1.0.
+ * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
+ * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
+ * than 1.0.
+ * @return std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> A tuple of
+ * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs.
+ */
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>>
+generate_bipartite_rmat_edgelist(raft::handle_t const& handle,
+                                 raft::random::RngState& rng_state,
+                                 size_t src_scale,
+                                 size_t dst_scale,
+                                 size_t num_edges,
+                                 double a = 0.57,
+                                 double b = 0.19,
+                                 double c = 0.19);
+
 enum class generator_distribution_t { POWER_LAW = 0, UNIFORM };
 
 /**
@@ -408,29 +447,49 @@ symmetrize_edgelist_from_triangular(
   std::optional<rmm::device_uvector<weight_t>>&& optional_d_weights_v,
   bool check_diagonal = false);
 
+/**
+ * @brief scramble vertex IDs in a graph
+ *
+ * Given a vertex list for a graph, scramble the input vertex IDs.
+ *
+ * The scramble code here follows the algorithm in the Graph 500 reference
+ * implementation version 3.0.0.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param vertices Vector of input vertices
+ * @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN).
+ * @return rmm::device_uvector object storing scrambled vertex IDs.
+ */
+template <typename vertex_t>
+rmm::device_uvector<vertex_t> scramble_vertex_ids(raft::handle_t const& handle,
+                                                  rmm::device_uvector<vertex_t>&& vertices,
+                                                  size_t lgN);
+
 /**
  * @brief scramble vertex ids in a graph
  *
- * Given an edgelist for a graph, scramble all vertex ids by the given offset.
- * This translation is done in place.
+ * Given an edge list for a graph, scramble the input vertex IDs.
  *
  * The scramble code here follows the algorithm in the Graph 500 reference
  * implementation version 3.0.0.
  *
  * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
  * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
  * handles to various CUDA libraries) to run graph algorithms.
- * @param d_src_v Vector of source vertices
- * @param d_dst_v Vector of destination vertices
- * @param vertex_id_offset Offset to add to each vertex id
- * @param seed Used to initialize random number generator
+ * @param d_src_v Vector of input source vertices
+ * @param d_dst_v Vector of input destination vertices
+ * @param lgN The input & output (scrambled) vertex IDs are assumed to be in [0, 2^lgN).
+ * @return Tuple of two rmm::device_uvector objects storing scrambled source & destination vertex
+ * IDs, respectively.
  */
 template <typename vertex_t>
-void scramble_vertex_ids(raft::handle_t const& handle,
-                         rmm::device_uvector<vertex_t>& d_src_v,
-                         rmm::device_uvector<vertex_t>& d_dst_v,
-                         vertex_t vertex_id_offset,
-                         uint64_t seed = 0);
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> scramble_vertex_ids(
+  raft::handle_t const& handle,
+  rmm::device_uvector<vertex_t>&& srcs,
+  rmm::device_uvector<vertex_t>&& dsts,
+  size_t lgN);
 
 /**
  * @brief Combine edgelists from multiple sources into a single edgelist

diff --git a/cpp/src/generators/generate_bipartite_rmat_edgelist.cu b/cpp/src/generators/generate_bipartite_rmat_edgelist.cu
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cugraph/detail/utility_wrappers.hpp>
+#include <cugraph/graph_generators.hpp>
+#include <cugraph/utilities/error.hpp>
+
+#include <raft/core/handle.hpp>
+#include <raft/random/rng.cuh>
+
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/transform.h>
+#include <thrust/tuple.h>
+
+#include <rmm/detail/error.hpp>
+#include <tuple>
+
+namespace cugraph {
+
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>>
+generate_bipartite_rmat_edgelist(raft::handle_t const& handle,
+                                 raft::random::RngState& rng_state,
+                                 size_t src_scale,
+                                 size_t dst_scale,
+                                 size_t num_edges,
+                                 double a,
+                                 double b,
+                                 double c)
+{
+  CUGRAPH_EXPECTS(
+    (size_t{1} << src_scale) <= static_cast<size_t>(std::numeric_limits<vertex_t>::max()),
+    "Invalid input argument: src_scale too large for vertex_t.");
+  CUGRAPH_EXPECTS(
+    (size_t{1} << dst_scale) <= static_cast<size_t>(std::numeric_limits<vertex_t>::max()),
+    "Invalid input argument: dst_scale too large for vertex_t.");
+  CUGRAPH_EXPECTS((a >= 0.0) && (b >= 0.0) && (c >= 0.0) && (a + b + c <= 1.0),
+                  "Invalid input argument: a, b, c should be non-negative and a + b + c should not "
+                  "be larger than 1.0.");
+
+  // to limit memory footprint (1024 is a tuning parameter)
+  auto max_edges_to_generate_per_iteration =
+    static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * 1024;
+  rmm::device_uvector<float> rands(
+    std::min(num_edges, max_edges_to_generate_per_iteration) * (src_scale + dst_scale),
+    handle.get_stream());
+
+  rmm::device_uvector<vertex_t> srcs(num_edges, handle.get_stream());
+  rmm::device_uvector<vertex_t> dsts(num_edges, handle.get_stream());
+
+  size_t num_edges_generated{0};
+  while (num_edges_generated < num_edges) {
+    auto num_edges_to_generate =
+      std::min(num_edges - num_edges_generated, max_edges_to_generate_per_iteration);
+    auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())) +
+                      num_edges_generated;
+
+    detail::uniform_random_fill(handle.get_stream(),
+                                rands.data(),
+                                num_edges_to_generate * (src_scale + dst_scale),
+                                0.0f,
+                                1.0f,
+                                rng_state);
+
+    thrust::transform(
+      handle.get_thrust_policy(),
+      thrust::make_counting_iterator(size_t{0}),
+      thrust::make_counting_iterator(num_edges_to_generate),
+      pair_first,
+      // if a + b == 0.0, a_norm is irrelevant, if (1.0 - (a+b)) == 0.0, c_norm is irrelevant
+      [src_scale,
+       dst_scale,
+       rands    = rands.data(),
+       a_plus_b = a + b,
+       a_plus_c = a + c,
+       a_norm   = (a + b) > 0.0 ? a / (a + b) : 0.0,
+       c_norm   = (1.0 - (a + b)) > 0.0 ? c / (1.0 - (a + b)) : 0.0] __device__(auto i) {
+        vertex_t src{0};
+        vertex_t dst{0};
+        size_t rand_offset = i * (src_scale + dst_scale);
+        for (int level = 0; level < static_cast<int>(std::max(src_scale, dst_scale)); ++level) {
+          auto dst_threshold = a_plus_c;
+          if (level < src_scale) {
+            auto r           = rands[rand_offset++];
+            auto src_bit_set = r > a_plus_b;
+            src +=
+              src_bit_set ? static_cast<vertex_t>(vertex_t{1} << (src_scale - (level + 1))) : 0;
+            dst_threshold = src_bit_set ? c_norm : a_norm;
+          }
+          if (level < dst_scale) {
+            auto r           = rands[rand_offset++];
+            auto dst_bit_set = r > dst_threshold;
+            dst +=
+              dst_bit_set ? static_cast<vertex_t>(vertex_t{1} << (dst_scale - (level + 1))) : 0;
+          }
+        }
+        return thrust::make_tuple(src, dst);
+      });
+    num_edges_generated += num_edges_to_generate;
+  }
+
+  return std::make_tuple(std::move(srcs), std::move(dsts));
+}
+
+template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>
+generate_bipartite_rmat_edgelist<int32_t>(raft::handle_t const& handle,
+                                          raft::random::RngState& rng_state,
+                                          size_t src_scale,
+                                          size_t dst_scale,
+                                          size_t num_edges,
+                                          double a,
+                                          double b,
+                                          double c);
+
+template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>
+generate_bipartite_rmat_edgelist<int64_t>(raft::handle_t const& handle,
+                                          raft::random::RngState& rng_state,
+                                          size_t src_scale,
+                                          size_t dst_scale,
+                                          size_t num_edges,
+                                          double a,
+                                          double b,
+                                          double c);
+
+}  // namespace cugraph
diff --git a/cpp/src/generators/generator_tools.cu b/cpp/src/generators/generator_tools.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,10 +43,10 @@ template <typename T>
 rmm::device_uvector<T> append_all(raft::handle_t const& handle,
                                   std::vector<rmm::device_uvector<T>>&& input)
 {
-  size_t size{0};
-  // for (size_t i = 0; i < input.size(); ++i) size += input[i].size();
-  for (auto& element : input)
-    size += element.size();
+  auto size = std::transform_reduce(
+    input.begin(), input.end(), size_t{0}, std::plus<size_t>{}, [](auto const& element) {
+      return element.size();
+    });
 
   rmm::device_uvector<T> output(size, handle.get_stream());
   auto output_iter = output.begin();
@@ -56,36 +56,43 @@ rmm::device_uvector<T> append_all(raft::handle_t const& handle,
     output_iter += element.size();
   }
 
-  /*
-for (size_t i = 0; i < input.size(); ++i) {
-  raft::copy(output_iter, input[i].begin(), input[i].size(), handle.get_stream());
-  output_iter += input[i].size();
-}
-  */
-
   return output;
 }
 
 }  // namespace detail
 
 template <typename vertex_t>
-void scramble_vertex_ids(raft::handle_t const& handle,
-                         rmm::device_uvector<vertex_t>& d_src_v,
-                         rmm::device_uvector<vertex_t>& d_dst_v,
-                         vertex_t vertex_id_offset,
-                         uint64_t seed)
+rmm::device_uvector<vertex_t> scramble_vertex_ids(raft::handle_t const& handle,
+                                                  rmm::device_uvector<vertex_t>&& vertices,
+                                                  size_t lgN)
 {
-  vertex_t scale = 1 + raft::log2(d_src_v.size());
+  thrust::transform(handle.get_thrust_policy(),
+                    vertices.begin(),
+                    vertices.end(),
+                    vertices.begin(),
+                    [lgN] __device__(auto v) { return detail::scramble(v, lgN); });
 
-  auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin()));
+  return std::move(vertices);
+}
+
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> scramble_vertex_ids(
+  raft::handle_t const& handle,
+  rmm::device_uvector<vertex_t>&& srcs,
+  rmm::device_uvector<vertex_t>&& dsts,
+  size_t lgN)
+{
+  auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin()));
   thrust::transform(handle.get_thrust_policy(),
                     pair_first,
-                    pair_first + d_src_v.size(),
+                    pair_first + srcs.size(),
                     pair_first,
-                    [scale] __device__(auto pair) {
-                      return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), scale),
-                                                detail::scramble(thrust::get<1>(pair), scale));
+                    [lgN] __device__(auto pair) {
+                      return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), lgN),
+                                                detail::scramble(thrust::get<1>(pair), lgN));
                     });
+
+  return std::make_tuple(std::move(srcs), std::move(dsts));
 }
 
 template <typename vertex_t, typename weight_t>
@@ -250,17 +257,25 @@ symmetrize_edgelist_from_triangular(
                          optional_d_weights_v ? std::move(optional_d_weights_v) : std::nullopt);
 }
 
-template void scramble_vertex_ids(raft::handle_t const& handle,
-                                  rmm::device_uvector<int32_t>& d_src_v,
-                                  rmm::device_uvector<int32_t>& d_dst_v,
-                                  int32_t vertex_id_offset,
-                                  uint64_t seed);
-
-template void scramble_vertex_ids(raft::handle_t const& handle,
-                                  rmm::device_uvector<int64_t>& d_src_v,
-                                  rmm::device_uvector<int64_t>& d_dst_v,
-                                  int64_t vertex_id_offset,
-                                  uint64_t seed);
+template rmm::device_uvector<int32_t> scramble_vertex_ids(raft::handle_t const& handle,
+                                                          rmm::device_uvector<int32_t>&& vertices,
+                                                          size_t lgN);
+
+template rmm::device_uvector<int64_t> scramble_vertex_ids(raft::handle_t const& handle,
+                                                          rmm::device_uvector<int64_t>&& vertices,
+                                                          size_t lgN);
+
+template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>> scramble_vertex_ids(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& srcs,
+  rmm::device_uvector<int32_t>&& dsts,
+  size_t lgN);
+
+template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>> scramble_vertex_ids(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& srcs,
+  rmm::device_uvector<int64_t>&& dsts,
+  size_t lgN);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,