Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Biased sampling primitive #4430

Merged
merged 17 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 36 additions & 34 deletions cpp/include/cugraph/edge_partition_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,9 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MajorIterator>
size_t compute_number_of_edges(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
__host__ size_t compute_number_of_edges(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
return dcs_nzd_vertices_ ? thrust::transform_reduce(
rmm::exec_policy(stream),
Expand Down Expand Up @@ -250,7 +250,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
thrust::plus<size_t>());
}

rmm::device_uvector<edge_t> compute_local_degrees(rmm::cuda_stream_view stream) const
__host__ rmm::device_uvector<edge_t> compute_local_degrees(rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(this->major_range_size(), stream);
if (dcs_nzd_vertices_) {
Expand All @@ -277,9 +277,9 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MajorIterator>
rmm::device_uvector<edge_t> compute_local_degrees(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
__host__ rmm::device_uvector<edge_t> compute_local_degrees(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(thrust::distance(major_first, major_last), stream);
if (dcs_nzd_vertices_) {
Expand All @@ -306,10 +306,10 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MaskIterator, typename MajorIterator>
size_t compute_number_of_edges_with_mask(MaskIterator mask_first,
MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
__host__ size_t compute_number_of_edges_with_mask(MaskIterator mask_first,
MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
return dcs_nzd_vertices_ ? thrust::transform_reduce(
rmm::exec_policy(stream),
Expand Down Expand Up @@ -348,8 +348,8 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MaskIterator>
rmm::device_uvector<edge_t> compute_local_degrees_with_mask(MaskIterator mask_first,
rmm::cuda_stream_view stream) const
__host__ rmm::device_uvector<edge_t> compute_local_degrees_with_mask(
MaskIterator mask_first, rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(this->major_range_size(), stream);
if (dcs_nzd_vertices_) {
Expand Down Expand Up @@ -384,10 +384,11 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MaskIterator, typename MajorIterator>
rmm::device_uvector<edge_t> compute_local_degrees_with_mask(MaskIterator mask_first,
MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
__host__ rmm::device_uvector<edge_t> compute_local_degrees_with_mask(
MaskIterator mask_first,
MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(thrust::distance(major_first, major_last), stream);
if (dcs_nzd_vertices_) {
Expand Down Expand Up @@ -553,9 +554,9 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MajorIterator>
size_t compute_number_of_edges(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
__host__ size_t compute_number_of_edges(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
return thrust::transform_reduce(
rmm::exec_policy(stream),
Expand All @@ -573,7 +574,7 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
thrust::plus<size_t>());
}

rmm::device_uvector<edge_t> compute_local_degrees(rmm::cuda_stream_view stream) const
__host__ rmm::device_uvector<edge_t> compute_local_degrees(rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(this->major_range_size(), stream);
thrust::transform(rmm::exec_policy(stream),
Expand All @@ -589,9 +590,9 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MajorIterator>
rmm::device_uvector<edge_t> compute_local_degrees(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
__host__ rmm::device_uvector<edge_t> compute_local_degrees(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(thrust::distance(major_first, major_last), stream);
thrust::transform(rmm::exec_policy(stream),
Expand All @@ -607,10 +608,10 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MaskIterator, typename MajorIterator>
size_t compute_number_of_edges_with_mask(MaskIterator mask_first,
MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
__host__ size_t compute_number_of_edges_with_mask(MaskIterator mask_first,
MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
return thrust::transform_reduce(
rmm::exec_policy(stream),
Expand All @@ -632,8 +633,8 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MaskIterator>
rmm::device_uvector<edge_t> compute_local_degrees_with_mask(MaskIterator mask_first,
rmm::cuda_stream_view stream) const
__host__ rmm::device_uvector<edge_t> compute_local_degrees_with_mask(
MaskIterator mask_first, rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(this->major_range_size(), stream);
thrust::transform(
Expand All @@ -651,10 +652,11 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
}

template <typename MaskIterator, typename MajorIterator>
rmm::device_uvector<edge_t> compute_local_degrees_with_mask(MaskIterator mask_first,
MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
__host__ rmm::device_uvector<edge_t> compute_local_degrees_with_mask(
MaskIterator mask_first,
MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(thrust::distance(major_first, major_last), stream);
thrust::transform(
Expand Down
62 changes: 55 additions & 7 deletions cpp/include/cugraph/utilities/thrust_tuple_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@

#include <rmm/device_uvector.hpp>

#include <thrust/iterator/iterator_traits.h>
#include <thrust/tuple.h>

#include <array>
Expand All @@ -30,7 +31,7 @@ template <typename TupleType, size_t I, size_t N>
struct is_thrust_tuple_of_arithemetic_impl {
constexpr bool evaluate() const
{
if (!std::is_arithmetic<typename thrust::tuple_element<I, TupleType>::type>::value) {
if (!std::is_arithmetic_v<typename thrust::tuple_element<I, TupleType>::type>) {
return false;
} else {
return is_thrust_tuple_of_arithemetic_impl<TupleType, I + 1, N>().evaluate();
Expand Down Expand Up @@ -123,19 +124,19 @@ struct is_arithmetic_vector : std::false_type {};

template <template <typename> typename Vector, typename T>
struct is_arithmetic_vector<Vector<T>, Vector>
: std::integral_constant<bool, std::is_arithmetic<T>::value> {};
: std::integral_constant<bool, std::is_arithmetic_v<T>> {};

template <typename T>
struct is_std_tuple_of_arithmetic_vectors : std::false_type {};

template <typename... Ts>
struct is_std_tuple_of_arithmetic_vectors<std::tuple<rmm::device_uvector<Ts>...>> {
static constexpr bool value = (... && std::is_arithmetic<Ts>::value);
static constexpr bool value = (... && std::is_arithmetic_v<Ts>);
};

template <typename T>
struct is_arithmetic_or_thrust_tuple_of_arithmetic
: std::integral_constant<bool, std::is_arithmetic<T>::value> {};
: std::integral_constant<bool, std::is_arithmetic_v<T>> {};

template <typename... Ts>
struct is_arithmetic_or_thrust_tuple_of_arithmetic<thrust::tuple<Ts...>>
Expand Down Expand Up @@ -196,8 +197,8 @@ auto to_thrust_tuple(thrust::tuple<Ts...> tuple_value)
}

template <typename Iterator,
typename std::enable_if_t<std::is_arithmetic<
typename std::iterator_traits<Iterator>::value_type>::value>* = nullptr>
typename std::enable_if_t<
std::is_arithmetic_v<typename std::iterator_traits<Iterator>::value_type>>* = nullptr>
auto to_thrust_iterator_tuple(Iterator iter)
{
return thrust::make_tuple(iter);
Expand All @@ -211,6 +212,53 @@ auto to_thrust_iterator_tuple(Iterator iter)
return iter.get_iterator_tuple();
}

template <typename T, size_t I, typename std::enable_if_t<std::is_arithmetic_v<T>>* = nullptr>
#ifdef __CUDACC__
__host__ __device__
#endif
auto
thrust_tuple_get_or_identity(T val)
{
return val;
}

template <typename T,
size_t I,
typename std::enable_if_t<is_thrust_tuple_of_arithmetic<T>::value>* = nullptr>
#ifdef __CUDACC__
__host__ __device__
#endif
auto
thrust_tuple_get_or_identity(T val)
{
return thrust::get<I>(val);
}

template <typename Iterator,
size_t I,
typename std::enable_if_t<std::is_arithmetic_v<
typename thrust::iterator_traits<Iterator>::value_type>>* = nullptr>
#ifdef __CUDACC__
__host__ __device__
#endif
auto
thrust_tuple_get_or_identity(Iterator val)
{
return val;
}

template <typename Iterator,
size_t I,
typename std::enable_if_t<is_thrust_tuple_of_arithmetic<
typename thrust::iterator_traits<Iterator>::value_type>::value>* = nullptr>
#ifdef __CUDACC__
__host__ __device__
#endif
auto
thrust_tuple_get_or_identity(Iterator val)
{
return thrust::get<I>(val.get_iterator_tuple());
}
// a temporary function to emulate thrust::tuple_cat (not supported) using std::tuple_cat (should
// retire once thrust::tuple is replaced with cuda::std::tuple)
template <typename... TupleTypes>
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cugraph/vertex_partition_device_view.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@
#pragma once

#include <cugraph/utilities/error.hpp>
#include <cugraph/vertex_partition_view.hpp>

#include <type_traits>

Expand Down
57 changes: 16 additions & 41 deletions cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,8 @@ __global__ static void extract_transform_v_frontier_e_hypersparse_or_low_degree(

edge_t local_degree{0};
if (lane_id < static_cast<int32_t>(max_key_idx - min_key_idx)) {
auto key = *(key_first + idx);
vertex_t major{};
if constexpr (std::is_same_v<key_t, vertex_t>) {
major = key;
} else {
major = thrust::get<0>(key);
}
auto key = *(key_first + idx);
auto major = thrust_tuple_get_or_identity<key_t, 0>(key);
if constexpr (hypersparse) {
auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major);
if (major_hypersparse_idx) {
Expand Down Expand Up @@ -333,13 +328,8 @@ __global__ static void extract_transform_v_frontier_e_mid_degree(
cuda::atomic_ref<size_t, cuda::thread_scope_device> buffer_idx(*buffer_idx_ptr);

while (idx < static_cast<size_t>(thrust::distance(key_first, key_last))) {
auto key = *(key_first + idx);
vertex_t major{};
if constexpr (std::is_same_v<key_t, vertex_t>) {
major = key;
} else {
major = thrust::get<0>(key);
}
auto key = *(key_first + idx);
auto major = thrust_tuple_get_or_identity<key_t, 0>(key);
auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
vertex_t const* indices{nullptr};
edge_t local_edge_offset{};
Expand Down Expand Up @@ -432,13 +422,8 @@ __global__ static void extract_transform_v_frontier_e_high_degree(
cuda::atomic_ref<size_t, cuda::thread_scope_device> buffer_idx(*buffer_idx_ptr);

while (idx < static_cast<size_t>(thrust::distance(key_first, key_last))) {
auto key = *(key_first + idx);
vertex_t major{};
if constexpr (std::is_same_v<key_t, vertex_t>) {
major = key;
} else {
major = thrust::get<0>(key);
}
auto key = *(key_first + idx);
auto major = thrust_tuple_get_or_identity<key_t, 0>(key);
auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
vertex_t const* indices{nullptr};
edge_t local_edge_offset{};
Expand Down Expand Up @@ -561,15 +546,10 @@ extract_transform_v_frontier_e(raft::handle_t const& handle,
thrust::optional<output_value_t>>>>);

if (do_expensive_check) {
vertex_t const* frontier_vertex_first{nullptr};
vertex_t const* frontier_vertex_last{nullptr};
if constexpr (std::is_same_v<key_t, vertex_t>) {
frontier_vertex_first = frontier.begin();
frontier_vertex_last = frontier.end();
} else {
frontier_vertex_first = thrust::get<0>(frontier.begin().get_iterator_tuple());
frontier_vertex_last = thrust::get<0>(frontier.end().get_iterator_tuple());
}
auto frontier_vertex_first =
thrust_tuple_get_or_identity<decltype(frontier.begin()), 0>(frontier.begin());
auto frontier_vertex_last =
thrust_tuple_get_or_identity<decltype(frontier.end()), 0>(frontier.end());
auto num_invalid_keys =
frontier.size() -
thrust::count_if(handle.get_thrust_policy(),
Expand Down Expand Up @@ -659,17 +639,12 @@ extract_transform_v_frontier_e(raft::handle_t const& handle,
get_dataframe_buffer_end(edge_partition_frontier_key_buffer);
}

vertex_t const* edge_partition_frontier_major_first{nullptr};
vertex_t const* edge_partition_frontier_major_last{nullptr};
if constexpr (std::is_same_v<key_t, vertex_t>) {
edge_partition_frontier_major_first = edge_partition_frontier_key_first;
edge_partition_frontier_major_last = edge_partition_frontier_key_last;
} else {
edge_partition_frontier_major_first =
thrust::get<0>(edge_partition_frontier_key_first.get_iterator_tuple());
edge_partition_frontier_major_last =
thrust::get<0>(edge_partition_frontier_key_last.get_iterator_tuple());
}
auto edge_partition_frontier_major_first =
thrust_tuple_get_or_identity<decltype(edge_partition_frontier_key_first), 0>(
edge_partition_frontier_key_first);
auto edge_partition_frontier_major_last =
thrust_tuple_get_or_identity<decltype(edge_partition_frontier_key_last), 0>(
edge_partition_frontier_key_last);

auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i);
auto max_pushes = edge_partition.compute_number_of_edges(
Expand Down
Loading
Loading