diff --git a/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 03dd160509a..19966783719 100644 --- a/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/include/cugraph/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -234,6 +234,11 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( VertexIterator map_unique_key_first, VertexIterator map_unique_key_last, ValueIterator map_value_first, +#if 1 // FIXME: this is unnecessary if we use a binary tree instead of cuco::static_map in + // collect_values_for_unique_keys, need to compare the two approaches + typename thrust::iterator_traits::value_type invalid_key, + typename thrust::iterator_traits::value_type invalid_value, +#endif KeyAggregatedEdgeOp key_aggregated_e_op, T init, ReduceOp reduce_op, @@ -313,8 +318,8 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( static_cast(thrust::distance(map_unique_key_first, map_unique_key_last)) / load_factor), static_cast(thrust::distance(map_unique_key_first, map_unique_key_last)) + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, handle.get_stream()); @@ -586,8 +591,8 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto multi_gpu_kv_map_ptr = std::make_unique< cuco::static_map>( size_t{0}, - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, handle.get_stream()); // relevant only when GraphViewType::is_multi_gpu is true if constexpr (GraphViewType::is_multi_gpu) { @@ -624,8 +629,8 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_minor_keys.size()) / load_factor), static_cast(unique_minor_keys.size()) + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, handle.get_stream()); diff --git a/cpp/include/cugraph/utilities/collect_comm.cuh b/cpp/include/cugraph/utilities/collect_comm.cuh index fc448aa36b0..a72d44b6aee 100644 --- a/cpp/include/cugraph/utilities/collect_comm.cuh +++ b/cpp/include/cugraph/utilities/collect_comm.cuh @@ -61,6 +61,11 @@ collect_values_for_keys(raft::comms::comms_t const& comm, VertexIterator1 collect_key_first, VertexIterator1 collect_key_last, KeyToGPUIdOp key_to_gpu_id_op, +#if 1 // FIXME: this is unnecessary if we use a binary tree instead of cuco::static_map, need to + // compare the two approaches + typename thrust::iterator_traits::value_type invalid_key, + typename thrust::iterator_traits::value_type invalid_value, +#endif rmm::cuda_stream_view stream_view) { using vertex_t = typename thrust::iterator_traits::value_type; @@ -84,8 +89,8 @@ collect_values_for_keys(raft::comms::comms_t const& comm, std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, stream_view); { @@ -147,8 +152,8 @@ collect_values_for_keys(raft::comms::comms_t const& comm, // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_keys.size()) / load_factor), unique_keys.size() + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, stream_view); { @@ -224,6 +229,11 @@ collect_values_for_unique_keys( rmm::device_uvector::value_type>&& collect_unique_keys, KeyToGPUIdOp key_to_gpu_id_op, +#if 1 // FIXME: this is unnecessary if we use a binary tree instead of cuco::static_map, need to + // compare the two approaches + typename thrust::iterator_traits::value_type invalid_key, + typename thrust::iterator_traits::value_type invalid_value, +#endif rmm::cuda_stream_view stream_view) { using vertex_t = typename thrust::iterator_traits::value_type; @@ -244,8 +254,8 @@ collect_values_for_unique_keys( std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), - cuco::sentinel::empty_key{invalid_vertex_id::value}, - cuco::sentinel::empty_value{0}, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, stream_adapter, stream_view); { diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index e0c9a42888e..e52a3703f18 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -446,14 +446,17 @@ class Louvain { cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ handle_.get_comms().get_size()}; - vertex_cluster_weights_v = cugraph::collect_values_for_keys(handle_.get_comms(), - cluster_keys_v_.begin(), - cluster_keys_v_.end(), - cluster_weights_v_.data(), - next_clusters_v_.begin(), - next_clusters_v_.end(), - vertex_to_gpu_id_op, - handle_.get_stream()); + vertex_cluster_weights_v = + cugraph::collect_values_for_keys(handle_.get_comms(), + cluster_keys_v_.begin(), + cluster_keys_v_.end(), + cluster_weights_v_.data(), + next_clusters_v_.begin(), + next_clusters_v_.end(), + vertex_to_gpu_id_op, + invalid_vertex_id::value, + std::numeric_limits::max(), + handle_.get_stream()); src_cluster_weights = edge_partition_src_property_t(handle_, current_graph_view_); @@ -533,6 +536,8 @@ class Louvain { cluster_keys_v_.begin(), cluster_keys_v_.end(), cluster_weights_v_.begin(), + invalid_vertex_id::value, + std::numeric_limits::max(), detail::key_aggregated_edge_op_t{total_edge_weight, resolution}, thrust::make_tuple(vertex_t{-1}, weight_t{0}), detail::reduce_op_t{}, diff --git a/cpp/src/structure/renumber_utils_impl.cuh b/cpp/src/structure/renumber_utils_impl.cuh index d4e248d9674..8204584b2a0 100644 --- a/cpp/src/structure/renumber_utils_impl.cuh +++ b/cpp/src/structure/renumber_utils_impl.cuh @@ -410,6 +410,8 @@ void renumber_ext_vertices(raft::handle_t const& handle, thrust::make_counting_iterator(local_int_vertex_first), std::move(sorted_unique_ext_vertices), detail::compute_gpu_id_from_ext_vertex_t{comm_size}, + invalid_vertex_id::value, + invalid_vertex_id::value, handle.get_stream()); renumber_map_ptr.reset();