diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index f724caa791c..5e5562c4095 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -1445,7 +1445,8 @@ enum class k_core_degree_type_t { IN, OUT, INOUT }; /** * @brief Compute core numbers of individual vertices from K-core decomposition. * - * The input graph should not have self-loops nor multi-edges. + * The input graph should not have self-loops nor multi-edges. Currently, only undirected graphs are + * supported. * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. @@ -1460,14 +1461,14 @@ enum class k_core_degree_type_t { IN, OUT, INOUT }; * out-degrees, or in-degrees + out_degrees. * @param k_first Find K-cores from K = k_first. Any vertices that do not belong to k_first-core * will have core numbers of 0. - * @param k_last Find K-cores to K = k_last. Any vertices that belong to (k_last + 1) core will have - * core numbers of k_last. + * @param k_last Find K-cores to K = k_last. Any vertices that belong to (k_last)-core will have + * their core numbers set to their degrees on k_last-core. * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template void core_number(raft::handle_t const& handle, graph_view_t const& graph_view, - vertex_t* core_numbers, + edge_t* core_numbers, k_core_degree_type_t degree_type, size_t k_first = 0, size_t k_last = std::numeric_limits::max(), diff --git a/cpp/include/cugraph/prims/reduce_op.cuh b/cpp/include/cugraph/prims/reduce_op.cuh index 518ec8ec158..8807ba944ea 100644 --- a/cpp/include/cugraph/prims/reduce_op.cuh +++ b/cpp/include/cugraph/prims/reduce_op.cuh @@ -16,6 +16,8 @@ #pragma once +#include + namespace cugraph { namespace reduce_op { @@ -55,5 +57,21 @@ struct min { } }; +// FIXME: thrust::plus can replace this. +// reducing N elements (operator < should be defined between any two elements), the minimum element +// should be selected. +template +struct plus { + using type = T; + // FIXME: actually every reduction operation should be side-effect free if reduction is performed + // by thrust; thrust reduction call rounds up the number of invocations based on the block size + // and discards the values outside the valid range; this does not work if the reduction operation + // has side-effects. + static constexpr bool pure_function = true; // this can be called in any process + property_op op{}; + + __host__ __device__ T operator()(T const& lhs, T const& rhs) const { return op(lhs, rhs); } +}; + } // namespace reduce_op } // namespace cugraph diff --git a/cpp/src/cores/core_number_impl.cuh b/cpp/src/cores/core_number_impl.cuh index d72eaa3dc4c..049a238a3cd 100644 --- a/cpp/src/cores/core_number_impl.cuh +++ b/cpp/src/cores/core_number_impl.cuh @@ -17,6 +17,11 @@ #include #include +#include +#include +#include +#include +#include #include #include @@ -25,16 +30,257 @@ namespace cugraph { +namespace { + +// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used +template +struct v_to_core_number_t { + edge_t const* core_numbers{nullptr}; + vertex_t v_first{0}; + + __device__ edge_t operator()(vertex_t v) const { return core_numbers[v - v_first]; } +}; + +// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used +template +struct mult_degree_by_two_t { + __device__ edge_t operator()(edge_t d) const { return d * edge_t{2}; } +}; + +} // namespace + template void core_number(raft::handle_t const& handle, graph_view_t const& graph_view, - vertex_t* core_numbers, + edge_t* core_numbers, k_core_degree_type_t degree_type, size_t k_first, size_t k_last, bool do_expensive_check) { - CUGRAPH_FAIL("unimplemented."); + // check input arguments. + + CUGRAPH_EXPECTS(graph_view.is_symmetric(), + "Invalid input argument: core_number currently supports only undirected graphs."); + CUGRAPH_EXPECTS((degree_type == k_core_degree_type_t::IN) || + (degree_type == k_core_degree_type_t::OUT) || + (degree_type == k_core_degree_type_t::INOUT), + "Invalid input argument: degree_type should be IN, OUT, or INOUT."); + CUGRAPH_EXPECTS(k_first <= k_last, "Invalid input argument: k_first <= k_last."); + + if (do_expensive_check) { + CUGRAPH_EXPECTS(graph_view.count_self_loops(handle) == 0, + "Invalid input argument: graph_view has self-loops."); + if (graph_view.is_multigraph()) { + CUGRAPH_EXPECTS(graph_view.count_multi_edges(handle) == 0, + "Invalid input argument: graph_view has multi-edges."); + } + } + + // initialize core_numbers to degrees + + if (graph_view.is_symmetric()) { // in-degree == out-degree + auto out_degrees = graph_view.compute_out_degrees(handle); + if ((degree_type == k_core_degree_type_t::IN) || (degree_type == k_core_degree_type_t::OUT)) { + thrust::copy( + handle.get_thrust_policy(), out_degrees.begin(), out_degrees.end(), core_numbers); + } else { + auto inout_degree_first = + thrust::make_transform_iterator(out_degrees.begin(), mult_degree_by_two_t{}); + thrust::copy(handle.get_thrust_policy(), + inout_degree_first, + inout_degree_first + out_degrees.size(), + core_numbers); + } + } else { + if (degree_type == k_core_degree_type_t::IN) { + auto in_degrees = graph_view.compute_in_degrees(handle); + thrust::copy(handle.get_thrust_policy(), in_degrees.begin(), in_degrees.end(), core_numbers); + } else if (degree_type == k_core_degree_type_t::OUT) { + auto out_degrees = graph_view.compute_out_degrees(handle); + thrust::copy( + handle.get_thrust_policy(), out_degrees.begin(), out_degrees.end(), core_numbers); + } else { + auto in_degrees = graph_view.compute_in_degrees(handle); + auto out_degrees = graph_view.compute_out_degrees(handle); + auto degree_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(in_degrees.begin(), out_degrees.begin())); + thrust::transform(handle.get_thrust_policy(), + degree_pair_first, + degree_pair_first + in_degrees.size(), + core_numbers, + [] __device__(auto p) { return thrust::get<0>(p) + thrust::get<1>(p); }); + } + } + + // remove 0 degree vertices (as they already belong to 0-core and they don't affect core numbers) + // and clip core numbers of the "less than k_first degree" vertices to 0 + + rmm::device_uvector remaining_vertices(graph_view.get_number_of_local_vertices(), + handle.get_stream()); + remaining_vertices.resize( + thrust::distance( + remaining_vertices.begin(), + thrust::copy_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(graph_view.get_local_vertex_first()), + thrust::make_counting_iterator(graph_view.get_local_vertex_last()), + remaining_vertices.begin(), + [core_numbers, v_first = graph_view.get_local_vertex_first()] __device__( + auto v) { return core_numbers[v - v_first] > edge_t{0}; })), + handle.get_stream()); + + if (k_first > 1) { + thrust::for_each( + handle.get_thrust_policy(), + remaining_vertices.begin(), + remaining_vertices.end(), + [k_first, core_numbers, v_first = graph_view.get_local_vertex_first()] __device__(auto v) { + if (core_numbers[v - v_first] < k_first) { core_numbers[v - v_first] = edge_t{0}; } + }); + } + + // start iteration + + enum class Bucket { cur, next, num_buckets }; + VertexFrontier(Bucket::num_buckets)> + vertex_frontier(handle); + + col_properties_t, edge_t> + dst_core_numbers(handle, graph_view); + copy_to_adj_matrix_col(handle, graph_view, core_numbers, dst_core_numbers); + + auto k = std::max(k_first, size_t{2}); // degree 0|1 vertices belong to 0|1-core + if (graph_view.is_symmetric() && (degree_type == k_core_degree_type_t::INOUT) && + ((k % 2) == 1)) { // core numbers are always even numbers if symmetric and INOUT + ++k; + } + while (k <= k_last) { + size_t aggregate_num_remaining_vertices{0}; + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + aggregate_num_remaining_vertices = host_scalar_allreduce( + comm, remaining_vertices.size(), raft::comms::op_t::SUM, handle.get_stream()); + } else { + aggregate_num_remaining_vertices = remaining_vertices.size(); + } + if (aggregate_num_remaining_vertices == 0) { break; } + + // FIXME: scanning the remaining vertices can add significant overhead if the number of distinct + // core numbers in [k_first, std::min(max_degree, k_last)] is large and there are many high core + // number vertices (so the number of remaining vertices remains large for many iterations). Need + // more tuning (e.g. Possibly use a logarithmic binning) if we encounter such use cases. + auto less_than_k_first = thrust::stable_partition( + handle.get_thrust_policy(), + remaining_vertices.begin(), + remaining_vertices.end(), + [core_numbers, k, v_first = graph_view.get_local_vertex_first()] __device__(auto v) { + return core_numbers[v - v_first] >= k; + }); + vertex_frontier.get_bucket(static_cast(Bucket::cur)) + .insert(less_than_k_first, remaining_vertices.end()); + remaining_vertices.resize(thrust::distance(remaining_vertices.begin(), less_than_k_first), + handle.get_stream()); + + auto delta = (graph_view.is_symmetric() && (degree_type == k_core_degree_type_t::INOUT)) + ? edge_t{2} + : edge_t{1}; + if (vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size() > 0) { + do { + // FIXME: If most vertices have core numbers less than k, (dst_val >= k) will be mostly + // false leading to too many unnecessary edge traversals (this is especially problematic if + // the number of distinct core numbers in [k_first, std::min(max_degree, k_last)] is large). + // There are two potential solutions: 1) extract a sub-graph and work on the sub-graph & 2) + // mask-out/delete edges. + if (graph_view.is_symmetric() || ((degree_type == k_core_degree_type_t::IN) || + (degree_type == k_core_degree_type_t::INOUT))) { + update_frontier_v_push_if_out_nbr( + handle, + graph_view, + vertex_frontier, + static_cast(Bucket::cur), + std::vector{static_cast(Bucket::next)}, + dummy_properties_t{}.device_view(), + dst_core_numbers.device_view(), + [k, delta] __device__(vertex_t src, vertex_t dst, auto, auto dst_val) { + return dst_val >= k ? thrust::optional{delta} : thrust::nullopt; + }, + reduce_op::plus(), + core_numbers, + core_numbers, + [k_first, k, delta, v_first = graph_view.get_local_vertex_first()] __device__( + auto v, auto v_val, auto pushed_val) { + auto new_core_number = v_val >= pushed_val ? v_val - pushed_val : edge_t{0}; + new_core_number = new_core_number < (k - delta) ? (k - delta) : new_core_number; + new_core_number = new_core_number < k_first ? edge_t{0} : new_core_number; + return thrust::optional>{ + thrust::make_tuple(static_cast(Bucket::next), new_core_number)}; + }); + } + + if (!graph_view.is_symmetric() && ((degree_type == k_core_degree_type_t::OUT) || + (degree_type == k_core_degree_type_t::INOUT))) { + // FIXME: we can create a transposed copy of the input graph (note that currently, + // transpose works only on graph_t (and does not work on graph_view_t)). + CUGRAPH_FAIL("unimplemented."); + } + + copy_to_adj_matrix_col( + handle, + graph_view, + vertex_frontier.get_bucket(static_cast(Bucket::next)).begin(), + vertex_frontier.get_bucket(static_cast(Bucket::next)).end(), + core_numbers, + dst_core_numbers); + + vertex_frontier.get_bucket(static_cast(Bucket::next)) + .resize(static_cast(thrust::distance( + vertex_frontier.get_bucket(static_cast(Bucket::next)).begin(), + thrust::remove_if( + handle.get_thrust_policy(), + vertex_frontier.get_bucket(static_cast(Bucket::next)).begin(), + vertex_frontier.get_bucket(static_cast(Bucket::next)).end(), + [core_numbers, k, v_first = graph_view.get_local_vertex_first()] __device__(auto v) { + return core_numbers[v - v_first] >= k; + })))); + vertex_frontier.get_bucket(static_cast(Bucket::next)).shrink_to_fit(); + + vertex_frontier.get_bucket(static_cast(Bucket::cur)).clear(); + vertex_frontier.get_bucket(static_cast(Bucket::cur)).shrink_to_fit(); + vertex_frontier.swap_buckets(static_cast(Bucket::cur), + static_cast(Bucket::next)); + } while (vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size() > 0); + + // FIXME: scanning the remaining vertices can add significant overhead if the number of + // distinct core numbers in [k_first, std::min(max_degree, k_last)] is large and there are + // many high core number vertices (so the number of remaining vertices remains large for many + // iterations). Need more tuning (e.g. Possibly use a logarithmic binning) if we encounter + // such use cases. + remaining_vertices.resize( + thrust::distance( + remaining_vertices.begin(), + thrust::remove_if( + handle.get_thrust_policy(), + remaining_vertices.begin(), + remaining_vertices.end(), + [core_numbers, k, v_first = graph_view.get_local_vertex_first()] __device__(auto v) { + return core_numbers[v - v_first] < k; + })), + handle.get_stream()); + k += delta; + } else { + auto remaining_vertex_core_number_first = thrust::make_transform_iterator( + remaining_vertices.begin(), + v_to_core_number_t{core_numbers, graph_view.get_local_vertex_first()}); + auto min_core_number = + reduce_v(handle, + graph_view, + remaining_vertex_core_number_first, + remaining_vertex_core_number_first + remaining_vertices.size(), + std::numeric_limits::max(), + raft::comms::op_t::MIN); + k = std::max(k + delta, static_cast(min_core_number + edge_t{delta})); + } + } } } // namespace cugraph diff --git a/cpp/src/cores/core_number_mg.cu b/cpp/src/cores/core_number_mg.cu index e078e535eca..3471417075a 100644 --- a/cpp/src/cores/core_number_mg.cu +++ b/cpp/src/cores/core_number_mg.cu @@ -38,7 +38,7 @@ template void core_number(raft::handle_t const& handle, template void core_number(raft::handle_t const& handle, graph_view_t const& graph_view, - int32_t* core_numbers, + int64_t* core_numbers, k_core_degree_type_t degree_type, size_t k_first, size_t k_last, @@ -46,7 +46,7 @@ template void core_number(raft::handle_t const& handle, template void core_number(raft::handle_t const& handle, graph_view_t const& graph_view, - int32_t* core_numbers, + int64_t* core_numbers, k_core_degree_type_t degree_type, size_t k_first, size_t k_last, diff --git a/cpp/src/cores/core_number_sg.cu b/cpp/src/cores/core_number_sg.cu index 745b5979797..19667fa4535 100644 --- a/cpp/src/cores/core_number_sg.cu +++ b/cpp/src/cores/core_number_sg.cu @@ -38,7 +38,7 @@ template void core_number(raft::handle_t const& handle, template void core_number(raft::handle_t const& handle, graph_view_t const& graph_view, - int32_t* core_numbers, + int64_t* core_numbers, k_core_degree_type_t degree_type, size_t k_first, size_t k_last, @@ -46,7 +46,7 @@ template void core_number(raft::handle_t const& handle, template void core_number(raft::handle_t const& handle, graph_view_t const& graph_view, - int32_t* core_numbers, + int64_t* core_numbers, k_core_degree_type_t degree_type, size_t k_first, size_t k_last, diff --git a/cpp/src/structure/graph_impl.cuh b/cpp/src/structure/graph_impl.cuh index ed6b8addbd8..222b51ed9cf 100644 --- a/cpp/src/structure/graph_impl.cuh +++ b/cpp/src/structure/graph_impl.cuh @@ -1270,10 +1270,7 @@ graph_t vertex_partition_lasts(comm_size); - for (int i = 0; i < comm_size; ++i) { - vertex_partition_lasts[i] = graph_view.get_vertex_partition_last(i); - } + auto vertex_partition_lasts = graph_view.get_vertex_partition_lasts(); rmm::device_uvector edgelist_majors(number_of_local_edges, handle.get_stream()); rmm::device_uvector edgelist_minors(edgelist_majors.size(), handle.get_stream()); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 829ee9f0927..3880ccd04b0 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -438,6 +438,10 @@ ConfigureTest(BFS_VISITOR_TEST visitors/bfs_test.cpp) # - Random Walks Visitor tests -------------------------------------------------------------------- ConfigureTest(RANDOM_WALKS_VISITOR_TEST visitors/rw_test.cu) +################################################################################################### +# - Core Number tests ----------------------------------------------------------------------------- +ConfigureTest(CORE_NUMBER_TEST cores/core_number_test.cpp) + ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- @@ -513,6 +517,10 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG GRAPH BROADCAST tests -------------------------------------------------------------- ConfigureTestMG(MG_GRAPH_BROADCAST_TEST bcast/mg_graph_bcast.cpp) + ########################################################################################### + # - MG Core Number tests ------------------------------------------------------------------ + ConfigureTestMG(MG_CORE_NUMBER_TEST cores/mg_core_number_test.cpp) + ########################################################################################### # - MG PRIMS COUNT_IF_V tests ------------------------------------------------------------- ConfigureTestMG(MG_COUNT_IF_V_TEST prims/mg_count_if_v.cu) diff --git a/cpp/tests/cores/core_number_test.cpp b/cpp/tests/cores/core_number_test.cpp new file mode 100644 index 00000000000..647ef83b88d --- /dev/null +++ b/cpp/tests/cores/core_number_test.cpp @@ -0,0 +1,400 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +// self-loops and multi-edges are masked out and do not participate in degree computation, this code +// assumes that every vertex's neighbor list is sorted. +template +std::vector core_number_reference(edge_t const* offsets, + vertex_t const* indices, + vertex_t num_vertices, + cugraph::k_core_degree_type_t degree_type, + size_t k_first = 0, + size_t k_last = std::numeric_limits::max()) +{ + // mask out self-loops and multi_edges + + std::vector edge_valids(offsets[num_vertices], true); + + for (vertex_t i = 0; i < num_vertices; ++i) { + for (edge_t j = offsets[i]; j < offsets[i + 1]; j++) { + if (indices[j] == i) { + edge_valids[j] = false; + } else if ((j > offsets[i]) && (indices[j] == indices[j - 1])) { + edge_valids[j] = false; + } + } + } + + // construct the CSC representation if necessary + + std::vector csc_offsets(num_vertices + 1, edge_t{0}); + std::vector csc_indices(offsets[num_vertices], vertex_t{0}); + std::vector csc_edge_valids(offsets[num_vertices], true); + std::vector counters(num_vertices, edge_t{0}); + + for (edge_t i = 0; i < offsets[num_vertices]; ++i) { + ++counters[indices[i]]; + } + std::partial_sum(counters.begin(), counters.end(), csc_offsets.begin() + 1); + std::fill(counters.begin(), counters.end(), edge_t{0}); + for (vertex_t i = 0; i < num_vertices; ++i) { + for (edge_t j = offsets[i]; j < offsets[i + 1]; ++j) { + auto dst = indices[j]; + csc_indices[csc_offsets[dst] + counters[dst]] = i; + if (!edge_valids[j]) { csc_edge_valids[csc_offsets[dst] + counters[dst]] = false; } + ++counters[dst]; + } + } + + // initialize core_numbers to degrees + + std::vector degrees(num_vertices, edge_t{0}); + if ((degree_type == cugraph::k_core_degree_type_t::OUT) || + (degree_type == cugraph::k_core_degree_type_t::INOUT)) { + for (vertex_t i = 0; i < num_vertices; ++i) { + for (edge_t j = offsets[i]; j < offsets[i + 1]; ++j) { + if (edge_valids[j]) { ++degrees[i]; } + } + } + } + if ((degree_type == cugraph::k_core_degree_type_t::IN) || + (degree_type == cugraph::k_core_degree_type_t::INOUT)) { + for (vertex_t i = 0; i < num_vertices; ++i) { + for (edge_t j = csc_offsets[i]; j < csc_offsets[i + 1]; ++j) { + if (csc_edge_valids[j]) { ++degrees[i]; } + } + } + } + std::vector core_numbers = std::move(degrees); + + // sort vertices based on degrees + + std::vector sorted_vertices(num_vertices); + std::iota(sorted_vertices.begin(), sorted_vertices.end(), vertex_t{0}); + std::sort(sorted_vertices.begin(), sorted_vertices.end(), [&core_numbers](auto lhs, auto rhs) { + return core_numbers[lhs] < core_numbers[rhs]; + }); + + // update initial bin boundaries + + std::vector bin_start_offsets = {0}; + + edge_t cur_degree{0}; + for (vertex_t i = 0; i < num_vertices; ++i) { + auto degree = core_numbers[sorted_vertices[i]]; + if (degree > cur_degree) { + bin_start_offsets.insert(bin_start_offsets.end(), degree - cur_degree, i); + cur_degree = degree; + } + } + + // initialize vertex positions + + std::vector v_positions(num_vertices); + for (vertex_t i = 0; i < num_vertices; ++i) { + v_positions[sorted_vertices[i]] = i; + } + + // update core numbers + + for (vertex_t i = 0; i < num_vertices; ++i) { + auto v = sorted_vertices[i]; + if (core_numbers[v] >= k_last) { break; } + for (edge_t j = offsets[v]; j < offsets[v + 1]; ++j) { + auto nbr = indices[j]; + if (edge_valids[j] && (core_numbers[nbr] > core_numbers[v])) { + for (edge_t k = csc_offsets[nbr]; k < csc_offsets[nbr + 1]; ++k) { + if (csc_indices[k] == v) { + csc_edge_valids[k] = false; + break; + } + } + if ((degree_type == cugraph::k_core_degree_type_t::IN) || + (degree_type == cugraph::k_core_degree_type_t::INOUT)) { + auto nbr_pos = v_positions[nbr]; + auto bin_start_pos = bin_start_offsets[core_numbers[nbr]]; + std::swap(v_positions[nbr], v_positions[sorted_vertices[bin_start_pos]]); + std::swap(sorted_vertices[nbr_pos], sorted_vertices[bin_start_pos]); + ++bin_start_offsets[core_numbers[nbr]]; + --core_numbers[nbr]; + } + } + } + for (edge_t j = csc_offsets[v]; j < csc_offsets[v + 1]; ++j) { + auto nbr = csc_indices[j]; + if (csc_edge_valids[j] && (core_numbers[nbr] > core_numbers[v])) { + for (edge_t k = offsets[nbr]; k < offsets[nbr + 1]; ++k) { + if (indices[k] == v) { + edge_valids[k] = false; + break; + } + } + if ((degree_type == cugraph::k_core_degree_type_t::OUT) || + (degree_type == cugraph::k_core_degree_type_t::INOUT)) { + auto nbr_pos = v_positions[nbr]; + auto bin_start_pos = bin_start_offsets[core_numbers[nbr]]; + std::swap(v_positions[nbr], v_positions[sorted_vertices[bin_start_pos]]); + std::swap(sorted_vertices[nbr_pos], sorted_vertices[bin_start_pos]); + ++bin_start_offsets[core_numbers[nbr]]; + --core_numbers[nbr]; + } + } + } + } + + // clip core numbers + + std::transform( + core_numbers.begin(), core_numbers.end(), core_numbers.begin(), [k_first, k_last](auto c) { + if (c < k_first) { + return edge_t{0}; + } else { + return c; + } + }); + + return core_numbers; +} + +struct CoreNumber_Usecase { + cugraph::k_core_degree_type_t degree_type{cugraph::k_core_degree_type_t::OUT}; + size_t k_first{0}; // vertices that does not belong to k_first cores will have core numbers of 0 + size_t k_last{std::numeric_limits::max()}; // vertices that belong (k_last + 1)-core will + // have core numbers of k_last + + bool check_correctness{true}; +}; + +template +class Tests_CoreNumber + : public ::testing::TestWithParam> { + public: + Tests_CoreNumber() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(CoreNumber_Usecase const& core_number_usecase, + input_usecase_t const& input_usecase) + { + constexpr bool renumber = true; + + using weight_t = float; + + raft::handle_t handle{}; + HighResClock hr_clock{}; + + if (cugraph::test::g_perf) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } + + auto [graph, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, false, renumber, true, true); + + if (cugraph::test::g_perf) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + auto graph_view = graph.view(); + + ASSERT_TRUE(core_number_usecase.k_first <= core_number_usecase.k_last) + << "Invalid pair of (k_first, k_last)."; + + rmm::device_uvector d_core_numbers(graph_view.get_number_of_vertices(), + handle.get_stream()); + + if (cugraph::test::g_perf) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } + + cugraph::core_number(handle, + graph_view, + d_core_numbers.data(), + core_number_usecase.degree_type, + core_number_usecase.k_first, + core_number_usecase.k_last); + + if (cugraph::test::g_perf) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "Core Number took " << elapsed_time * 1e-6 << " s.\n"; + } + + if (core_number_usecase.check_correctness) { + cugraph::graph_t unrenumbered_graph(handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + cugraph::test::construct_graph( + handle, input_usecase, true, false, true, true); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.get_matrix_partition_view().get_offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.get_matrix_partition_view().get_indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto h_reference_core_numbers = core_number_reference(h_offsets.data(), + h_indices.data(), + graph_view.get_number_of_vertices(), + core_number_usecase.degree_type, + core_number_usecase.k_first, + core_number_usecase.k_last); + + std::vector h_cugraph_core_numbers(graph_view.get_number_of_vertices()); + if (renumber) { + rmm::device_uvector d_unrenumbered_core_numbers(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_core_numbers) = + cugraph::test::sort_by_key(handle, *d_renumber_map_labels, d_core_numbers); + raft::update_host(h_cugraph_core_numbers.data(), + d_unrenumbered_core_numbers.data(), + d_unrenumbered_core_numbers.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + } else { + raft::update_host(h_cugraph_core_numbers.data(), + d_core_numbers.data(), + d_core_numbers.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + } + + ASSERT_TRUE(std::equal(h_reference_core_numbers.begin(), + h_reference_core_numbers.end(), + h_cugraph_core_numbers.begin())) + << "core numbers do not match with the reference values."; + } + } +}; + +using Tests_CoreNumber_File = Tests_CoreNumber; +using Tests_CoreNumber_Rmat = Tests_CoreNumber; + +TEST_P(Tests_CoreNumber_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_CoreNumber_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_CoreNumber_Rmat, CheckInt32Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_CoreNumber_Rmat, CheckInt64Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_CoreNumber_File, + ::testing::Combine( + // enable correctness checks + testing::Values( + CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::IN, size_t{0}, std::numeric_limits::max()}, + CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::OUT, size_t{0}, std::numeric_limits::max()}, + CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::INOUT, size_t{0}, std::numeric_limits::max()}, + CoreNumber_Usecase{cugraph::k_core_degree_type_t::IN, size_t{2}, size_t{2}}, + CoreNumber_Usecase{cugraph::k_core_degree_type_t::OUT, size_t{1}, size_t{3}}, + CoreNumber_Usecase{cugraph::k_core_degree_type_t::INOUT, size_t{2}, size_t{4}}), + testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/polbooks.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_CoreNumber_Rmat, + ::testing::Combine( + // enable correctness checks + testing::Values( + CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::IN, size_t{0}, std::numeric_limits::max()}, + CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::OUT, size_t{0}, std::numeric_limits::max()}, + CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::INOUT, size_t{0}, std::numeric_limits::max()}), + testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_CoreNumber_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + testing::Values(CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::OUT, size_t{0}, std::numeric_limits::max(), false}), + testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/cores/mg_core_number_test.cpp b/cpp/tests/cores/mg_core_number_test.cpp new file mode 100644 index 00000000000..78efd735781 --- /dev/null +++ b/cpp/tests/cores/mg_core_number_test.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +struct CoreNumber_Usecase { + cugraph::k_core_degree_type_t degree_type{cugraph::k_core_degree_type_t::OUT}; + size_t k_first{0}; // vertices that does not belong to k_first cores will have core numbers of 0 + size_t k_last{std::numeric_limits::max()}; // vertices that belong (k_last + 1)-core will + // have core numbers of k_last + + bool check_correctness{true}; +}; + +template +class Tests_MGCoreNumber + : public ::testing::TestWithParam> { + public: + Tests_MGCoreNumber() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running CoreNumber on multiple GPUs to that of a single-GPU run + template + void run_current_test(CoreNumber_Usecase const& core_number_usecase, + input_usecase_t const& input_usecase) + { + using weight_t = float; + + // 1. initialize handle + + raft::handle_t handle{}; + HighResClock hr_clock{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { + --row_comm_size; + } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + if (cugraph::test::g_perf) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + + auto [mg_graph, d_mg_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, false, true, true, true); + + if (cugraph::test::g_perf) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + + auto mg_graph_view = mg_graph.view(); + + // 3. run MG CoreNumber + + rmm::device_uvector d_mg_core_numbers(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + if (cugraph::test::g_perf) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + + cugraph::core_number(handle, + mg_graph_view, + d_mg_core_numbers.data(), + core_number_usecase.degree_type, + core_number_usecase.k_first, + core_number_usecase.k_last); + + if (cugraph::test::g_perf) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG Core Number took " << elapsed_time * 1e-6 << " s.\n"; + } + + // 5. copmare SG & MG results + + if (core_number_usecase.check_correctness) { + // 5-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, (*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()); + auto d_mg_aggregate_core_numbers = + cugraph::test::device_gatherv(handle, d_mg_core_numbers.data(), d_mg_core_numbers.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 5-2. unrenumbr MG results + + std::tie(std::ignore, d_mg_aggregate_core_numbers) = cugraph::test::sort_by_key( + handle, d_mg_aggregate_renumber_map_labels, d_mg_aggregate_core_numbers); + + // 5-3. create SG graph + + cugraph::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + cugraph::test::construct_graph( + handle, input_usecase, false, false, true, true); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_EQ(mg_graph_view.get_number_of_vertices(), sg_graph_view.get_number_of_vertices()); + + // 5-4. run SG CoreNumber + + rmm::device_uvector d_sg_core_numbers(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::core_number(handle, + sg_graph_view, + d_sg_core_numbers.data(), + core_number_usecase.degree_type, + core_number_usecase.k_first, + core_number_usecase.k_last); + + // 5-4. compare + + std::vector h_mg_aggregate_core_numbers(mg_graph_view.get_number_of_vertices()); + raft::update_host(h_mg_aggregate_core_numbers.data(), + d_mg_aggregate_core_numbers.data(), + d_mg_aggregate_core_numbers.size(), + handle.get_stream()); + + std::vector h_sg_core_numbers(sg_graph_view.get_number_of_vertices()); + raft::update_host(h_sg_core_numbers.data(), + d_sg_core_numbers.data(), + d_sg_core_numbers.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + ASSERT_TRUE(std::equal(h_mg_aggregate_core_numbers.begin(), + h_mg_aggregate_core_numbers.end(), + h_sg_core_numbers.begin())); + } + } + } +}; + +using Tests_MGCoreNumber_File = Tests_MGCoreNumber; +using Tests_MGCoreNumber_Rmat = Tests_MGCoreNumber; + +TEST_P(Tests_MGCoreNumber_File, CheckInt32Int32FloatFloat) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGCoreNumber_Rmat, CheckInt32Int32FloatFloat) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGCoreNumber_Rmat, CheckInt32Int64FloatFloat) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGCoreNumber_Rmat, CheckInt64Int64FloatFloat) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_tests, + Tests_MGCoreNumber_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values( + CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::IN, size_t{0}, std::numeric_limits::max()}, + CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::OUT, size_t{0}, std::numeric_limits::max()}, + CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::INOUT, size_t{0}, std::numeric_limits::max()}, + CoreNumber_Usecase{cugraph::k_core_degree_type_t::IN, size_t{2}, size_t{2}}, + CoreNumber_Usecase{cugraph::k_core_degree_type_t::OUT, size_t{1}, size_t{3}}, + CoreNumber_Usecase{cugraph::k_core_degree_type_t::INOUT, size_t{2}, size_t{4}}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/polbooks.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGCoreNumber_Rmat, + ::testing::Combine(::testing::Values(CoreNumber_Usecase{cugraph::k_core_degree_type_t::IN, + size_t{0}, + std::numeric_limits::max()}, + CoreNumber_Usecase{cugraph::k_core_degree_type_t::OUT, + size_t{0}, + std::numeric_limits::max()}, + CoreNumber_Usecase{cugraph::k_core_degree_type_t::INOUT, + size_t{0}, + std::numeric_limits::max()}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, true, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGCoreNumber_Rmat, + ::testing::Combine( + ::testing::Values(CoreNumber_Usecase{ + cugraph::k_core_degree_type_t::OUT, size_t{0}, std::numeric_limits::max(), false}), + ::testing::Values( + cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false, 0, true)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index e40db784d09..38df6c7dbda 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -244,9 +244,9 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { translate(handle, src_v, dst_v); if (undirected_) - std::tie(src_v, dst_v, std::ignore) = + std::tie(src_v, dst_v, weights_v) = cugraph::symmetrize_edgelist_from_triangular( - handle, std::move(src_v), std::move(dst_v), std::nullopt); + handle, std::move(src_v), std::move(dst_v), std::move(weights_v)); if (multi_gpu) { std::tie(store_transposed ? dst_v : src_v, store_transposed ? src_v : dst_v, weights_v) = diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu index 55223a0bf68..801dbc7a0f0 100644 --- a/cpp/tests/utilities/thrust_wrapper.cu +++ b/cpp/tests/utilities/thrust_wrapper.cu @@ -84,6 +84,11 @@ template std::tuple, rmm::device_uvector> rmm::device_uvector const& keys, rmm::device_uvector const& values); +template std::tuple, rmm::device_uvector> sort_by_key( + raft::handle_t const& handle, + rmm::device_uvector const& keys, + rmm::device_uvector const& values); + template std::tuple, rmm::device_uvector> sort_by_key( raft::handle_t const& handle, rmm::device_uvector const& keys,