From 8837403c78f72a9d44414d0eb7ff43ff59569315 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Fri, 11 Feb 2022 17:36:39 -0500 Subject: [PATCH 1/8] add node2vec C API implementation --- cpp/CMakeLists.txt | 1 + cpp/include/cugraph/api_helpers.hpp | 8 + cpp/include/cugraph_c/algorithms.h | 21 ++- cpp/src/c_api/random_walks.cpp | 220 ++++++++++++++++++++++++++++ cpp/tests/c_api/node2vec_test.c | 93 +++++++++--- 5 files changed, 319 insertions(+), 24 deletions(-) create mode 100644 cpp/src/c_api/random_walks.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 62657a53479..73b1a4bf560 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -353,6 +353,7 @@ add_library(cugraph_c SHARED src/c_api/bfs.cpp src/c_api/sssp.cpp src/c_api/extract_paths.cpp + src/c_api/random_walks.cpp ) add_library(cugraph::cugraph_c ALIAS cugraph_c) diff --git a/cpp/include/cugraph/api_helpers.hpp b/cpp/include/cugraph/api_helpers.hpp index 549313abb90..16138fca589 100644 --- a/cpp/include/cugraph/api_helpers.hpp +++ b/cpp/include/cugraph/api_helpers.hpp @@ -28,6 +28,14 @@ enum class sampling_strategy_t : int { UNIFORM = 0, BIASED, NODE2VEC }; struct sampling_params_t { sampling_params_t(void) {} + sampling_params_t(sampling_strategy_t sampling_type, double p = 1.0, double q = 1.0, bool use_alpha_cache = false) + : sampling_type_(sampling_type), + p_(p), + q_(q), + use_alpha_cache_(use_alpha_cache) + { + } + sampling_params_t(int sampling_type, double p = 1.0, double q = 1.0, bool use_alpha_cache = false) : sampling_type_(static_cast(sampling_type)), p_(p), diff --git a/cpp/include/cugraph_c/algorithms.h b/cpp/include/cugraph_c/algorithms.h index 7f993fc67e3..89723a7d25b 100644 --- a/cpp/include/cugraph_c/algorithms.h +++ b/cpp/include/cugraph_c/algorithms.h @@ -328,6 +328,8 @@ typedef struct { * needs to be transposed * @param [in] sources Array of source vertices * @param [in] max_depth Maximum length of the generated path + * @param [in] compress_result If true, return the paths as a compressed sparse row matrix, + * otherwise return as a dense matrix * @param [in] p The return parameter * @param [in] q The in/out parameter * @param [in] result Output from the node2vec call @@ -339,7 +341,7 @@ cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* sources, size_t max_depth, - bool_t flag_use_padding, + bool_t compress_result, double p, double q, cugraph_random_walk_result_t** result, @@ -359,8 +361,8 @@ size_t cugraph_random_walk_result_get_max_path_length(cugraph_random_walk_result /** * @brief Get the matrix (row major order) of vertices in the paths * - * @param [in] result The result from extract_paths - * @return type erased array pointing to the matrix in device memory + * @param [in] result The result from a random walk algorithm + * @return type erased array pointing to the path matrix in device memory */ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_paths( cugraph_random_walk_result_t* result); @@ -368,12 +370,21 @@ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_paths( /** * @brief Get the matrix (row major order) of edge weights in the paths * - * @param [in] result The result from extract_paths - * @return type erased array pointing to the edge weights in device memory + * @param [in] result The result from a random walk algorithm + * @return type erased array pointing to the path edge weights in device memory */ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_weights( cugraph_random_walk_result_t* result); +/** + * @brief If the random walk result is compressed, get the offsets + * + * @param [in] result The result from a random walk algorithm + * @return type erased array pointing to the edge offsets in device memory + */ +cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_offsets( + cugraph_random_walk_result_t* result); + /** * @brief Free random walks result * diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp new file mode 100644 index 00000000000..e0b93603b6e --- /dev/null +++ b/cpp/src/c_api/random_walks.cpp @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include +#include +#include + +#include + +namespace cugraph { +namespace c_api { + +struct cugraph_random_walk_result_t { + bool result_compressed_; + size_t max_path_length_; + cugraph_type_erased_device_array_t* paths_; + cugraph_type_erased_device_array_t* weights_; + cugraph_type_erased_device_array_t* offsets_; +}; + +struct node2vec_functor : public abstract_functor { + raft::handle_t const& handle_; + cugraph_graph_t* graph_; + cugraph_type_erased_device_array_view_t const* sources_; + size_t max_depth_; + bool compress_result_; + double p_; + double q_; + cugraph_random_walk_result_t* result_{}; + + node2vec_functor(raft::handle_t const& handle, + cugraph_graph_t* graph, + cugraph_type_erased_device_array_view_t const* sources, + size_t max_depth, + bool compress_result, + double p, + double q) + : abstract_functor(), + handle_(handle), + graph_(graph), + sources_(sources), + max_depth_(max_depth), + compress_result_(compress_result), + p_(p), + q_(q) + { + } + + template + void operator()() + { + // FIXME: Think about how to handle SG vice MG + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else if constexpr (multi_gpu) { + unsupported(); + } else { + // node2vec expects store_transposed == false + if constexpr (store_transposed) { + error_code_ = cugraph::c_api:: + transpose_storage( + handle_, graph_, error_.get()); + if (error_code_ != CUGRAPH_SUCCESS) return; + } + + auto graph = + reinterpret_cast*>( + graph_->graph_); + + auto graph_view = graph->view(); + + auto number_map = reinterpret_cast*>(graph_->number_map_); + + rmm::device_uvector sources(sources_->size_, handle_.get_stream()); + raft::copy( + sources.data(), sources_->as_type(), sources.size(), handle_.get_stream()); + + // + // Need to renumber sources + // + renumber_ext_vertices(handle_, + sources.data(), + sources.size(), + number_map->data(), + graph_view.get_local_vertex_first(), + graph_view.get_local_vertex_last(), + false); + + // FIXME: Forcing this to edge_t for now. What should it really be? + // Seems like it should be the smallest size that can accommodate + // max_depth_ * sources_->size_ + auto [paths, weights, offsets] = cugraph::random_walks( + handle_, + graph_view, + sources.data(), + static_cast(sources.size()), + static_cast(max_depth_), + !compress_result_, + // std::make_unique(2, p_, q_, false)); + std::make_unique(cugraph::sampling_strategy_t::NODE2VEC, p_, q_)); + + result_ = new cugraph_random_walk_result_t{ + compress_result_, + max_depth_, + new cugraph_type_erased_device_array_t(paths, graph_->vertex_type_), + new cugraph_type_erased_device_array_t(weights, graph_->weight_type_), + new cugraph_type_erased_device_array_t(offsets, graph_->vertex_type_)}; + } + } +}; + +} // namespace c_api +} // namespace cugraph + +cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* sources, + size_t max_depth, + bool_t compress_results, + double p, + double q, + cugraph_random_walk_result_t** result, + cugraph_error_t** error) +{ + *result = nullptr; + *error = nullptr; + + try { + auto p_handle = reinterpret_cast(handle); + auto p_graph = reinterpret_cast(graph); + auto p_sources = + reinterpret_cast(sources); + + cugraph::c_api::node2vec_functor functor( + *p_handle, p_graph, p_sources, max_depth, compress_results, p, q); + + // FIXME: This seems like a recurring pattern. Can I encapsulate + // The vertex_dispatcher and error handling calls into a reusable function? + // After all, we're in C++ here. + cugraph::dispatch::vertex_dispatcher(cugraph::c_api::dtypes_mapping[p_graph->vertex_type_], + cugraph::c_api::dtypes_mapping[p_graph->edge_type_], + cugraph::c_api::dtypes_mapping[p_graph->weight_type_], + p_graph->store_transposed_, + p_graph->multi_gpu_, + functor); + + if (functor.error_code_ != CUGRAPH_SUCCESS) { + *error = reinterpret_cast(functor.error_.release()); + return functor.error_code_; + } + + *result = reinterpret_cast(functor.result_); + } catch (std::exception const& ex) { + *error = reinterpret_cast(new cugraph::c_api::cugraph_error_t{ex.what()}); + return CUGRAPH_UNKNOWN_ERROR; + } + + return CUGRAPH_SUCCESS; +} + +size_t cugraph_random_walk_result_get_max_path_length(cugraph_random_walk_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return internal_pointer->max_path_length_; +} + +cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_paths( + cugraph_random_walk_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return reinterpret_cast( + internal_pointer->paths_->view()); +} + +cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_weights( + cugraph_random_walk_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return reinterpret_cast( + internal_pointer->weights_->view()); +} + +cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_offsets( + cugraph_random_walk_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return reinterpret_cast( + internal_pointer->offsets_->view()); +} + +void cugraph_random_walk_result_free(cugraph_random_walk_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + delete internal_pointer->paths_; + delete internal_pointer->weights_; + delete internal_pointer; +} diff --git a/cpp/tests/c_api/node2vec_test.c b/cpp/tests/c_api/node2vec_test.c index 1b7d58bf95c..7266583c11e 100644 --- a/cpp/tests/c_api/node2vec_test.c +++ b/cpp/tests/c_api/node2vec_test.c @@ -25,37 +25,92 @@ typedef int32_t vertex_t; typedef int32_t edge_t; typedef float weight_t; +const weight_t EPSILON = 0.001; + int generic_node2vec_test(vertex_t* h_src, - vertex_t* h_dst, - weight_t* h_wgt, - vertex_t* h_seeds, + vertex_t* h_dst, + weight_t* h_wgt, + vertex_t* h_seeds, size_t num_edges, size_t num_seeds, - size_t max_depth, - bool_t store_transposed) + size_t max_depth, + double p, + double q, + bool_t store_transposed) { int test_ret_value = 0; cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; - cugraph_error_t* ret_error = NULL; + cugraph_error_t* ret_error = NULL; - cugraph_resource_handle_t* p_handle = NULL; - cugraph_graph_t* p_graph = NULL; + cugraph_resource_handle_t* p_handle = NULL; + cugraph_graph_t* p_graph = NULL; + cugraph_random_walk_result_t* p_result = NULL; + cugraph_type_erased_device_array_t* p_sources = NULL; + cugraph_type_erased_device_array_view_t* p_source_view = NULL; p_handle = cugraph_create_resource_handle(); TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed."); ret_code = create_test_graph( p_handle, h_src, h_dst, h_wgt, num_edges, store_transposed, &p_graph, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + ret_code = + cugraph_type_erased_device_array_create(p_handle, num_seeds, INT32, &p_sources, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "p_sources create failed."); + + p_source_view = cugraph_type_erased_device_array_view(p_sources); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + p_handle, p_source_view, (byte_t*)h_seeds, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + ret_code = cugraph_node2vec( + p_handle, p_graph, p_source_view, max_depth, FALSE, p, q, &p_result, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "node2vec failed failed."); + + cugraph_type_erased_device_array_view_t* paths; + cugraph_type_erased_device_array_view_t* weights; + size_t max_path_length; + + max_path_length = cugraph_random_walk_result_get_max_path_length(p_result); + paths = cugraph_random_walk_result_get_paths(p_result); + weights = cugraph_random_walk_result_get_weights(p_result); + + vertex_t h_paths[max_path_length * num_seeds]; + weight_t h_weights[max_path_length * num_seeds]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_paths, paths, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_weights, weights, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - // Populate this test. // We can easily validate that the results of node2vec // are feasible by converting the sparse (h_src,h_dst,h_wgt) // into a dense host matrix and check each path. - - cugraph_sg_graph_free(p_graph); - cugraph_free_resource_handle(p_handle); - cugraph_error_free(ret_error); + int num_vertices = 5; + weight_t M[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) + M[i][j] = 0.0; + + for (int i = 0; i < num_edges; ++i) + M[h_src[i]][h_dst[i]] = h_wgt[i]; + + for (int i = 0; (i < num_seeds) && (test_ret_value == 0); ++i) { + for (int j = 0; (j < (max_path_length-1)) && (test_ret_value == 0); ++j) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_weights[i * (max_path_length - 1)], + M[h_paths[i * max_path_length]][h_paths[i * max_path_length + 1]], + EPSILON), + "node2vec weights don't match"); + } + } return test_ret_value; } @@ -65,13 +120,13 @@ int test_node2vec() size_t num_edges = 8; size_t num_vertices = 6; - vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; - vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; - weight_t wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t seeds[] = {0}; - size_t max_depth = 4; + vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t seeds[] = {0, 0}; + size_t max_depth = 4; - return generic_node2vec_test(src, dst, wgt, seeds, max_depth, num_edges, 1, FALSE); + return generic_node2vec_test(src, dst, wgt, seeds, num_edges, 2, max_depth, 0.8, 0.5, FALSE); } int main(int argc, char** argv) From ba66e8e882cc7db75f15469c3e03c5ada250858b Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Fri, 11 Feb 2022 17:38:15 -0500 Subject: [PATCH 2/8] fix copyright --- cpp/src/c_api/random_walks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp index e0b93603b6e..8b10e8a35e3 100644 --- a/cpp/src/c_api/random_walks.cpp +++ b/cpp/src/c_api/random_walks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From ab4c99df20abc0fdd62611beafefd05c8f6ea104 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Fri, 11 Feb 2022 22:04:57 -0500 Subject: [PATCH 3/8] fix clang-format issues --- cpp/include/cugraph/api_helpers.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/include/cugraph/api_helpers.hpp b/cpp/include/cugraph/api_helpers.hpp index 16138fca589..7c7dc61321c 100644 --- a/cpp/include/cugraph/api_helpers.hpp +++ b/cpp/include/cugraph/api_helpers.hpp @@ -28,11 +28,11 @@ enum class sampling_strategy_t : int { UNIFORM = 0, BIASED, NODE2VEC }; struct sampling_params_t { sampling_params_t(void) {} - sampling_params_t(sampling_strategy_t sampling_type, double p = 1.0, double q = 1.0, bool use_alpha_cache = false) - : sampling_type_(sampling_type), - p_(p), - q_(q), - use_alpha_cache_(use_alpha_cache) + sampling_params_t(sampling_strategy_t sampling_type, + double p = 1.0, + double q = 1.0, + bool use_alpha_cache = false) + : sampling_type_(sampling_type), p_(p), q_(q), use_alpha_cache_(use_alpha_cache) { } From 7595067e19fc51b9584897f7ea5792d3b65d3689 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Fri, 11 Feb 2022 22:11:24 -0500 Subject: [PATCH 4/8] fix copyright year --- cpp/include/cugraph/api_helpers.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cugraph/api_helpers.hpp b/cpp/include/cugraph/api_helpers.hpp index 7c7dc61321c..136a60453ec 100644 --- a/cpp/include/cugraph/api_helpers.hpp +++ b/cpp/include/cugraph/api_helpers.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From db88891f4095ba445949513caf92fee620dfb399 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 15 Feb 2022 20:03:43 -0500 Subject: [PATCH 5/8] add unit tests for the compress_results option --- cpp/include/cugraph_c/algorithms.h | 6 +-- cpp/src/c_api/random_walks.cpp | 11 ++-- cpp/tests/c_api/node2vec_test.c | 83 ++++++++++++++++++++++++++---- 3 files changed, 82 insertions(+), 18 deletions(-) diff --git a/cpp/include/cugraph_c/algorithms.h b/cpp/include/cugraph_c/algorithms.h index 89723a7d25b..dd5a7d850fb 100644 --- a/cpp/include/cugraph_c/algorithms.h +++ b/cpp/include/cugraph_c/algorithms.h @@ -377,12 +377,12 @@ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_weights( cugraph_random_walk_result_t* result); /** - * @brief If the random walk result is compressed, get the offsets + * @brief If the random walk result is compressed, get the path sizes * * @param [in] result The result from a random walk algorithm - * @return type erased array pointing to the edge offsets in device memory + * @return type erased array pointing to the path sizes in device memory */ -cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_offsets( +cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_path_sizes( cugraph_random_walk_result_t* result); /** diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp index 8b10e8a35e3..c2e7985d063 100644 --- a/cpp/src/c_api/random_walks.cpp +++ b/cpp/src/c_api/random_walks.cpp @@ -34,7 +34,7 @@ struct cugraph_random_walk_result_t { size_t max_path_length_; cugraph_type_erased_device_array_t* paths_; cugraph_type_erased_device_array_t* weights_; - cugraph_type_erased_device_array_t* offsets_; + cugraph_type_erased_device_array_t* sizes_; }; struct node2vec_functor : public abstract_functor { @@ -112,7 +112,7 @@ struct node2vec_functor : public abstract_functor { // FIXME: Forcing this to edge_t for now. What should it really be? // Seems like it should be the smallest size that can accommodate // max_depth_ * sources_->size_ - auto [paths, weights, offsets] = cugraph::random_walks( + auto [paths, weights, sizes] = cugraph::random_walks( handle_, graph_view, sources.data(), @@ -127,7 +127,7 @@ struct node2vec_functor : public abstract_functor { max_depth_, new cugraph_type_erased_device_array_t(paths, graph_->vertex_type_), new cugraph_type_erased_device_array_t(weights, graph_->weight_type_), - new cugraph_type_erased_device_array_t(offsets, graph_->vertex_type_)}; + new cugraph_type_erased_device_array_t(sizes, graph_->vertex_type_)}; } } }; @@ -203,18 +203,19 @@ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_weights( internal_pointer->weights_->view()); } -cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_offsets( +cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_path_sizes( cugraph_random_walk_result_t* result) { auto internal_pointer = reinterpret_cast(result); return reinterpret_cast( - internal_pointer->offsets_->view()); + internal_pointer->sizes_->view()); } void cugraph_random_walk_result_free(cugraph_random_walk_result_t* result) { auto internal_pointer = reinterpret_cast(result); delete internal_pointer->paths_; + delete internal_pointer->sizes_; delete internal_pointer->weights_; delete internal_pointer; } diff --git a/cpp/tests/c_api/node2vec_test.c b/cpp/tests/c_api/node2vec_test.c index 7266583c11e..7a0b479b9ac 100644 --- a/cpp/tests/c_api/node2vec_test.c +++ b/cpp/tests/c_api/node2vec_test.c @@ -31,9 +31,11 @@ int generic_node2vec_test(vertex_t* h_src, vertex_t* h_dst, weight_t* h_wgt, vertex_t* h_seeds, + size_t num_vertices, size_t num_edges, size_t num_seeds, size_t max_depth, + bool_t compressed_result, double p, double q, bool_t store_transposed) @@ -67,10 +69,11 @@ int generic_node2vec_test(vertex_t* h_src, TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); ret_code = cugraph_node2vec( - p_handle, p_graph, p_source_view, max_depth, FALSE, p, q, &p_result, &ret_error); + p_handle, p_graph, p_source_view, max_depth, compressed_result, p, q, &p_result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "node2vec failed failed."); cugraph_type_erased_device_array_view_t* paths; + cugraph_type_erased_device_array_view_t* path_sizes; cugraph_type_erased_device_array_view_t* weights; size_t max_path_length; @@ -92,7 +95,6 @@ int generic_node2vec_test(vertex_t* h_src, // We can easily validate that the results of node2vec // are feasible by converting the sparse (h_src,h_dst,h_wgt) // into a dense host matrix and check each path. - int num_vertices = 5; weight_t M[num_vertices][num_vertices]; for (int i = 0; i < num_vertices; ++i) @@ -102,13 +104,39 @@ int generic_node2vec_test(vertex_t* h_src, for (int i = 0; i < num_edges; ++i) M[h_src[i]][h_dst[i]] = h_wgt[i]; - for (int i = 0; (i < num_seeds) && (test_ret_value == 0); ++i) { - for (int j = 0; (j < (max_path_length-1)) && (test_ret_value == 0); ++j) { - TEST_ASSERT(test_ret_value, - nearlyEqual(h_weights[i * (max_path_length - 1)], - M[h_paths[i * max_path_length]][h_paths[i * max_path_length + 1]], - EPSILON), - "node2vec weights don't match"); + if (compressed_result) { + path_sizes = cugraph_random_walk_result_get_path_sizes(p_result); + + edge_t h_path_sizes[num_seeds]; + edge_t h_path_offsets[num_seeds + 1]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_path_sizes, path_sizes, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + h_path_offsets[0] = 0; + for (int i = 0; i < num_seeds; ++i) + h_path_offsets[i + 1] = h_path_offsets[i] + h_path_sizes[i]; + + for (int i = 0; (i < num_seeds) && (test_ret_value == 0); ++i) { + for (int j = h_path_offsets[i]; j < (h_path_offsets[i + 1] - 1); ++j) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_weights[j - i], M[h_paths[j]][h_paths[j + 1]], EPSILON), + "node2vec weights don't match"); + } + } + } else { + for (int i = 0; (i < num_seeds) && (test_ret_value == 0); ++i) { + for (int j = 0; (j < (max_path_length - 1)) && (test_ret_value == 0); ++j) { + if (h_paths[i * max_path_length + j + 1] != num_vertices) { + TEST_ASSERT( + test_ret_value, + nearlyEqual(h_weights[i * (max_path_length - 1) + j], + M[h_paths[i * max_path_length + j]][h_paths[i * max_path_length + j + 1]], + EPSILON), + "node2vec weights don't match"); + } + } } } @@ -126,12 +154,47 @@ int test_node2vec() vertex_t seeds[] = {0, 0}; size_t max_depth = 4; - return generic_node2vec_test(src, dst, wgt, seeds, num_edges, 2, max_depth, 0.8, 0.5, FALSE); + return generic_node2vec_test( + src, dst, wgt, seeds, num_vertices, num_edges, 2, max_depth, FALSE, 0.8, 0.5, FALSE); +} + +int test_node2vec_short_dense() +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t seeds[] = {2, 3}; + size_t max_depth = 4; + + return generic_node2vec_test( + src, dst, wgt, seeds, num_vertices, num_edges, 2, max_depth, FALSE, 0.8, 0.5, FALSE); +} + +int test_node2vec_short_sparse() +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t seeds[] = {2, 3}; + size_t max_depth = 4; + + // FIXME: max_depth seems to be off by 1. It's counting vertices + // instead of edges. + return generic_node2vec_test( + src, dst, wgt, seeds, num_vertices, num_edges, 2, max_depth, TRUE, 0.8, 0.5, FALSE); } int main(int argc, char** argv) { int result = 0; result |= RUN_TEST(test_node2vec); + result |= RUN_TEST(test_node2vec_short_dense); + result |= RUN_TEST(test_node2vec_short_sparse); return result; } From dd6cd6372f7bbc5b95635b0b5bbea7d6f36c8d6a Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 16 Feb 2022 15:39:51 -0500 Subject: [PATCH 6/8] apply technique from PR 2077 to node2vec CAPI implementation --- cpp/src/c_api/random_walks.cpp | 50 ++++++++-------------------------- 1 file changed, 11 insertions(+), 39 deletions(-) diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp index c2e7985d063..3f58a3d20b2 100644 --- a/cpp/src/c_api/random_walks.cpp +++ b/cpp/src/c_api/random_walks.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -47,17 +48,18 @@ struct node2vec_functor : public abstract_functor { double q_; cugraph_random_walk_result_t* result_{}; - node2vec_functor(raft::handle_t const& handle, - cugraph_graph_t* graph, - cugraph_type_erased_device_array_view_t const* sources, + node2vec_functor(cugraph_resource_handle_t const* handle, + ::cugraph_graph_t* graph, + ::cugraph_type_erased_device_array_view_t const* sources, size_t max_depth, bool compress_result, double p, double q) : abstract_functor(), - handle_(handle), - graph_(graph), - sources_(sources), + handle_(*reinterpret_cast(handle)), + graph_(reinterpret_cast(graph)), + sources_( + reinterpret_cast(sources)), max_depth_(max_depth), compress_result_(compress_result), p_(p), @@ -145,40 +147,10 @@ cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle, cugraph_random_walk_result_t** result, cugraph_error_t** error) { - *result = nullptr; - *error = nullptr; - - try { - auto p_handle = reinterpret_cast(handle); - auto p_graph = reinterpret_cast(graph); - auto p_sources = - reinterpret_cast(sources); - - cugraph::c_api::node2vec_functor functor( - *p_handle, p_graph, p_sources, max_depth, compress_results, p, q); - - // FIXME: This seems like a recurring pattern. Can I encapsulate - // The vertex_dispatcher and error handling calls into a reusable function? - // After all, we're in C++ here. - cugraph::dispatch::vertex_dispatcher(cugraph::c_api::dtypes_mapping[p_graph->vertex_type_], - cugraph::c_api::dtypes_mapping[p_graph->edge_type_], - cugraph::c_api::dtypes_mapping[p_graph->weight_type_], - p_graph->store_transposed_, - p_graph->multi_gpu_, - functor); - - if (functor.error_code_ != CUGRAPH_SUCCESS) { - *error = reinterpret_cast(functor.error_.release()); - return functor.error_code_; - } - - *result = reinterpret_cast(functor.result_); - } catch (std::exception const& ex) { - *error = reinterpret_cast(new cugraph::c_api::cugraph_error_t{ex.what()}); - return CUGRAPH_UNKNOWN_ERROR; - } + cugraph::c_api::node2vec_functor functor( + handle, graph, sources, max_depth, compress_results, p, q); - return CUGRAPH_SUCCESS; + return cugraph::c_api::run_algorithm(graph, functor, result, error); } size_t cugraph_random_walk_result_get_max_path_length(cugraph_random_walk_result_t* result) From 79737161b91ce91eb4109b781c498e85de3e3542 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 16 Feb 2022 18:04:37 -0500 Subject: [PATCH 7/8] add initializers for class variables --- cpp/src/c_api/random_walks.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp index 3f58a3d20b2..079821813e8 100644 --- a/cpp/src/c_api/random_walks.cpp +++ b/cpp/src/c_api/random_walks.cpp @@ -31,22 +31,22 @@ namespace cugraph { namespace c_api { struct cugraph_random_walk_result_t { - bool result_compressed_; - size_t max_path_length_; - cugraph_type_erased_device_array_t* paths_; - cugraph_type_erased_device_array_t* weights_; - cugraph_type_erased_device_array_t* sizes_; + bool result_compressed_{false}; + size_t max_path_length_{0}; + cugraph_type_erased_device_array_t* paths_{nullptr}; + cugraph_type_erased_device_array_t* weights_{nullptr}; + cugraph_type_erased_device_array_t* sizes_{nullptr}; }; struct node2vec_functor : public abstract_functor { raft::handle_t const& handle_; - cugraph_graph_t* graph_; - cugraph_type_erased_device_array_view_t const* sources_; - size_t max_depth_; - bool compress_result_; - double p_; - double q_; - cugraph_random_walk_result_t* result_{}; + cugraph_graph_t* graph_{nullptr}; + cugraph_type_erased_device_array_view_t const* sources_{nullptr}; + size_t max_depth_{0}; + bool compress_result_{false}; + double p_{0}; + double q_{0}; + cugraph_random_walk_result_t* result_{nullptr}; node2vec_functor(cugraph_resource_handle_t const* handle, ::cugraph_graph_t* graph, From 0a8e10edda060c10192f10852d4a349827452ae3 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 16 Feb 2022 18:59:31 -0500 Subject: [PATCH 8/8] add FIXME --- cpp/include/cugraph/api_helpers.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/include/cugraph/api_helpers.hpp b/cpp/include/cugraph/api_helpers.hpp index 136a60453ec..98cc4058508 100644 --- a/cpp/include/cugraph/api_helpers.hpp +++ b/cpp/include/cugraph/api_helpers.hpp @@ -36,6 +36,9 @@ struct sampling_params_t { { } + // FIXME: The new C API uses the above constructor, this constructor + // is only used by the legacy python/cython calls. It should be + // removed once it is no longer called. sampling_params_t(int sampling_type, double p = 1.0, double q = 1.0, bool use_alpha_cache = false) : sampling_type_(static_cast(sampling_type)), p_(p),