Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MNMG ECG #4030

Merged
merged 42 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
1a29c04
Add scaffolding for ecg implementation
Nov 29, 2023
aed75f4
Remove unused header files
Nov 29, 2023
563015a
first version of MNMG ecg implementation
Dec 1, 2023
64b13ab
Merge branch 'branch-24.02' of github.com:rapidsai/cugraph into ecg
Dec 1, 2023
013e1d7
First version of MNMG ecg implementation
Dec 1, 2023
24fe91d
Run ensample leiden for only one level
Dec 1, 2023
4ee5f4b
Run ensample leiden for only one level
Dec 1, 2023
b2f458a
MNMG ECG with Louvain
Dec 1, 2023
276f42c
Random assignment of initial cluster ids
Dec 4, 2023
bb09206
Randomize initial cluster assignments
Dec 6, 2023
6ce0b61
Add rng state to Louvain
Dec 6, 2023
909c710
Merge branch 'branch-24.02' of github.com:rapidsai/cugraph into ecg
Dec 6, 2023
baee0a9
Remove debugging code
Dec 6, 2023
103a6be
Add MG Ecg tests
Dec 6, 2023
0485a18
Add MG ECG tests
Dec 6, 2023
e2ab79f
Add copyright
Dec 6, 2023
93800b7
Remove debug code
Dec 6, 2023
6fa8ac7
Merge branch 'branch-24.02' of github.com:rapidsai/cugraph into ecg
Dec 6, 2023
bd28dc9
Add missing random shuffling
Dec 6, 2023
867093c
Fix vector size
Dec 8, 2023
d0b37dc
Use device_allgatherv instead of P2P to distribute extra numbers
Dec 8, 2023
8d7881b
Fix SG code branching
Dec 9, 2023
c201309
Compute final modularity based on original edge weights
Dec 12, 2023
8fc6be0
Address PR comments
Dec 14, 2023
741e715
Update doc-strings
Dec 24, 2023
e618aca
Update doc-strings
Dec 24, 2023
ec4ec86
Merge branch 'branch-24.02' of github.com:rapidsai/cugraph into ecg_m…
Dec 24, 2023
ec05f3f
skip calling .view() on empty graph object
Dec 25, 2023
fab6f35
Address PR comments
Jan 5, 2024
df7f25f
Address PR comments
Jan 5, 2024
843ea42
Add utility function to permute a (distributed) range
Jan 6, 2024
81e18e5
Add utility function to permute a (distributed) range
Jan 6, 2024
c8ad2d5
Assign a radom rank to each vertex id/number
Jan 6, 2024
4da1c9a
style fix
Jan 6, 2024
94faf51
Merge branch 'branch-24.02' of github.com:rapidsai/cugraph into ecg
Jan 6, 2024
b89dc11
Upate doc string
Jan 8, 2024
50c852a
fix typos, refactor code SG/MG code path
Jan 9, 2024
79b06fa
Change parameter order, add do_expensive_check falg parameter
Jan 9, 2024
194a20a
Merge branch 'branch-24.02' of github.com:rapidsai/cugraph into ecg
Jan 9, 2024
06f385f
Add missing semicolon
Jan 9, 2024
1579120
Merge branch 'branch-24.02' of github.com:rapidsai/cugraph into ecg
Jan 10, 2024
6d53896
Update ecg test
Jan 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#=============================================================================
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -187,6 +187,7 @@ endif()

set(CUGRAPH_SOURCES
src/detail/shuffle_vertices.cu
src/detail/permute_range.cu
src/detail/shuffle_vertex_pairs.cu
src/detail/collect_local_vertex_values.cu
src/detail/groupby_and_count.cu
Expand Down Expand Up @@ -218,6 +219,8 @@ set(CUGRAPH_SOURCES
src/community/louvain_mg.cu
src/community/leiden_sg.cu
src/community/leiden_mg.cu
src/community/ecg_sg.cu
src/community/ecg_mg.cu
src/community/legacy/louvain.cu
src/community/legacy/ktruss.cu
src/community/legacy/ecg.cu
Expand Down
126 changes: 95 additions & 31 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -541,30 +541,37 @@ weight_t hungarian(raft::handle_t const& handle,
* community hierarchies in large networks, J Stat Mech P10008 (2008),
* http://arxiv.org/abs/0803.0476
*
* @throws cugraph::logic_error when an error occurs.
*
* @tparam graph_view_t Type of graph
* @throws cugraph::logic_error when an error occurs.
*
* @param[in] handle Library handle (RAFT). If a communicator is set in the handle,
* @param[in] graph input graph object
* @param[out] clustering Pointer to device array where the clustering should be stored
* @param[in] max_level (optional) maximum number of levels to run (default 100)
* @param[in] threshold (optional) threshold for convergence at each level (default
* 1e-7)
* @param[in] resolution (optional) The value of the resolution parameter to use.
* Called gamma in the modularity formula, this changes the size
* of the communities. Higher resolutions lead to more smaller
* communities, lower resolutions lead to fewer larger
* communities. (default 1)
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
*
* @return a pair containing:
* 1) number of levels of the returned clustering
* 2) modularity of the returned clustering
* @param[in] handle Library handle (RAFT). If a communicator is set in the handle,
* @param[in] rng_state The RngState instance holding pseudo-random number generator state.
* @param[in] graph_view Input graph view object.
* @param[in] edge_weight_view Optional view object holding edge weights for @p graph_view.
* If @pedge_weight_view.has_value() == false, edge weights
* are assumed to be 1.0.
@param[out] clustering Pointer to device array where the clustering should be stored
* @param[in] max_level (optional) maximum number of levels to run (default 100)
* @param[in] threshold (optional) threshold for convergence at each level (default 1e-7)
* @param[in] resolution (optional) The value of the resolution parameter to use.
* Called gamma in the modularity formula, this changes the size
* of the communities. Higher resolutions lead to more smaller
* communities, lower resolutions lead to fewer larger
* communities. (default 1)
*
* @return a pair containing:
* 1) number of levels of the returned clustering
* 2) modularity of the returned clustering
*
*/
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::pair<size_t, weight_t> louvain(
raft::handle_t const& handle,
std::optional<std::reference_wrapper<raft::random::RngState>> rng_state,
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
vertex_t* clustering,
Expand Down Expand Up @@ -593,25 +600,33 @@ std::pair<size_t, weight_t> louvain(
*
* @throws cugraph::logic_error when an error occurs.
*
* @tparam graph_view_t Type of graph
*
* @param[in] handle Library handle (RAFT)
* @param[in] graph_view Input graph view object
* @param[in] max_level (optional) maximum number of levels to run (default 100)
* @param[in] resolution (optional) The value of the resolution parameter to use.
* Called gamma in the modularity formula, this changes the size
* of the communities. Higher resolutions lead to more smaller
* communities, lower resolutions lead to fewer larger
* communities. (default 1)
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
*
* @return a pair containing:
* 1) unique pointer to dendrogram
* 2) modularity of the returned clustering
* @param[in] handle Library handle (RAFT). If a communicator is set in the handle,
* @param[in] rng_state The RngState instance holding pseudo-random number generator state.
* @param[in] graph_view Input graph view object.
* @param[in] edge_weight_view Optional view object holding edge weights for @p graph_view.
* If @pedge_weight_view.has_value() == false, edge weights
* are assumed to be 1.0.
* @param[in] max_level (optional) maximum number of levels to run (default 100)
* @param[in] threshold (optional) threshold for convergence at each level (default 1e-7)
* @param[in] resolution (optional) The value of the resolution parameter to use.
* Called gamma in the modularity formula, this changes the size
* of the communities. Higher resolutions lead to more smaller
* communities, lower resolutions lead to fewer larger
* communities. (default 1)
* @return a pair containing:
* 1) unique pointer to dendrogram
* 2) modularity of the returned clustering
*
*/
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> louvain(
raft::handle_t const& handle,
std::optional<std::reference_wrapper<raft::random::RngState>> rng_state,
naimnv marked this conversation as resolved.
Show resolved Hide resolved
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
size_t max_level = 100,
Expand Down Expand Up @@ -779,6 +794,55 @@ void ecg(raft::handle_t const& handle,
vertex_t ensemble_size,
vertex_t* clustering);

/**
* @brief Computes the ecg clustering of the given graph.
*
* ECG runs truncated Louvain on an ensemble of permutations of the input graph,
* then uses the ensemble partitions to determine weights for the input graph.
* The final result is found by running full Louvain on the input graph using
* the determined weights. See https://arxiv.org/abs/1809.05578 for further
* information.
*
* @throws cugraph::logic_error when an error occurs.
*
naimnv marked this conversation as resolved.
Show resolved Hide resolved
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
*
* @param[in] handle Library handle (RAFT). If a communicator is set in the handle,
* @param[in] rng_state The RngState instance holding pseudo-random number generator state.
naimnv marked this conversation as resolved.
Show resolved Hide resolved
* @param[in] graph_view Input graph view object
* @param[in] edge_weight_view View object holding edge weights for @p graph_view.
* @param[in] min_weight Minimum edge weight to use in the final call of the clustering
* algorithm if an edge does not appear in any of the ensemble runs.
* @param[in] ensemble_size The ensemble size parameter
* @param[in] max_level (optional) maximum number of levels to run (default 100)
* @param[in] threshold (optional) threshold for convergence at each level (default 1e-7)
* @param[in] resolution (optional) The value of the resolution parameter to use.
* Called gamma in the modularity formula, this changes the size
* of the communities. Higher resolutions lead to more smaller
* communities, lower resolutions lead to fewer larger
* communities. (default 1)
*
* @return a tuple containing:
* 1) Device vector containing clustering result
* 2) number of levels of the returned clustering
* 3) modularity of the returned clustering
*
*/
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::tuple<rmm::device_uvector<vertex_t>, size_t, weight_t> ecg(
raft::handle_t const& handle,
raft::random::RngState& rng_state,
naimnv marked this conversation as resolved.
Show resolved Hide resolved
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
weight_t min_weight,
size_t ensemble_size,
naimnv marked this conversation as resolved.
Show resolved Hide resolved
size_t max_level = 100,
weight_t threshold = weight_t{1e-7},
weight_t resolution = weight_t{1});

/**
* @brief Generate edges in a minimum spanning forest of an undirected weighted graph.
*
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cugraph/detail/collect_comm_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,6 +15,7 @@
*/
#pragma once

#include <raft/core/device_span.hpp>
#include <raft/core/handle.hpp>
#include <rmm/device_uvector.hpp>

Expand Down
25 changes: 24 additions & 1 deletion cpp/include/cugraph/detail/shuffle_wrappers.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@
#pragma once

#include <raft/core/handle.hpp>
#include <raft/random/rng_state.hpp>
#include <rmm/device_uvector.hpp>

#include <optional>
Expand Down Expand Up @@ -138,6 +139,28 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning(
rmm::device_uvector<vertex_t>&& vertices,
rmm::device_uvector<value_t>&& values);

/**
* @brief Permute a range.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
*
* @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator,
* and handles to various CUDA libraries) to run graph algorithms.
* @param[in] rng_state The RngState instance holding pseudo-random number generator state.
* @param[in] local_range_size Size of local range assigned to this process.
* @param[in] local_start Start of local range assigned to this process.
*
* @return permuted range.
*/

template <typename vertex_t>
rmm::device_uvector<vertex_t> permute_range(raft::handle_t const& handle,
raft::random::RngState& rng_state,
vertex_t local_start,
vertex_t local_range_size,
bool multi_gpu = false,
bool do_expensive_check = false);

/**
* @brief Shuffle internal (i.e. renumbered) vertices to their local GPUs based on vertex
* partitioning.
Expand Down
27 changes: 14 additions & 13 deletions cpp/src/c_api/louvain.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -95,18 +95,19 @@ struct louvain_functor : public cugraph::c_api::abstract_functor {
// could add support in Louvain for std::nullopt as the edge weights behaving
// as desired and only instantiating a real edge_property_view_t for the
// coarsened graphs.
auto [level, modularity] =
cugraph::louvain(handle_,
graph_view,
(edge_weights != nullptr)
? std::make_optional(edge_weights->view())
: std::make_optional(cugraph::c_api::create_constant_edge_property(
handle_, graph_view, weight_t{1})
.view()),
clusters.data(),
max_level_,
static_cast<weight_t>(threshold_),
static_cast<weight_t>(resolution_));
auto [level, modularity] = cugraph::louvain(
handle_,
std::optional<std::reference_wrapper<raft::random::RngState>>{std::nullopt},
graph_view,
(edge_weights != nullptr)
? std::make_optional(edge_weights->view())
: std::make_optional(
cugraph::c_api::create_constant_edge_property(handle_, graph_view, weight_t{1})
.view()),
clusters.data(),
max_level_,
static_cast<weight_t>(threshold_),
static_cast<weight_t>(resolution_));

rmm::device_uvector<vertex_t> vertices(graph_view.local_vertex_partition_range_size(),
handle_.get_stream());
Expand Down
Loading