Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add node2vec C API implementation #2069

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ add_library(cugraph_c SHARED
src/c_api/bfs.cpp
src/c_api/sssp.cpp
src/c_api/extract_paths.cpp
src/c_api/random_walks.cpp
)
add_library(cugraph::cugraph_c ALIAS cugraph_c)

Expand Down
10 changes: 9 additions & 1 deletion cpp/include/cugraph/api_helpers.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,6 +28,14 @@ enum class sampling_strategy_t : int { UNIFORM = 0, BIASED, NODE2VEC };
struct sampling_params_t {
sampling_params_t(void) {}

sampling_params_t(sampling_strategy_t sampling_type,
double p = 1.0,
double q = 1.0,
bool use_alpha_cache = false)
: sampling_type_(sampling_type), p_(p), q_(q), use_alpha_cache_(use_alpha_cache)
{
}

sampling_params_t(int sampling_type, double p = 1.0, double q = 1.0, bool use_alpha_cache = false)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we taking int sampling_type here but sampling_strategy_t sampling_type above? Better be consistent.

And more fundamentally, why do we have two functions seemingly doing the same thing?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original code (developed for C++) included a constructor that takes an int sampling_type and converts it into the enum. This original code is used by the current python/cython interface.

In implementing the C code, I wanted to use the enum directly (I think it's more clear in the calling code to use the enum). I added the new constructor to allow for construction from the enum directly. I intend to remove the old constructor once the python/cython code uses the new C API.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, and in that case, FIXME statement stating the intention will make this clearer.

: sampling_type_(static_cast<sampling_strategy_t>(sampling_type)),
p_(p),
Expand Down
21 changes: 16 additions & 5 deletions cpp/include/cugraph_c/algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ typedef struct {
* needs to be transposed
* @param [in] sources Array of source vertices
* @param [in] max_depth Maximum length of the generated path
* @param [in] compress_result If true, return the paths as a compressed sparse row matrix,
* otherwise return as a dense matrix
* @param [in] p The return parameter
* @param [in] q The in/out parameter
* @param [in] result Output from the node2vec call
Expand All @@ -339,7 +341,7 @@ cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_type_erased_device_array_view_t* sources,
size_t max_depth,
bool_t flag_use_padding,
bool_t compress_result,
ChuckHastings marked this conversation as resolved.
Show resolved Hide resolved
double p,
double q,
cugraph_random_walk_result_t** result,
Expand All @@ -359,21 +361,30 @@ size_t cugraph_random_walk_result_get_max_path_length(cugraph_random_walk_result
/**
* @brief Get the matrix (row major order) of vertices in the paths
*
* @param [in] result The result from extract_paths
* @return type erased array pointing to the matrix in device memory
* @param [in] result The result from a random walk algorithm
* @return type erased array pointing to the path matrix in device memory
*/
cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_paths(
cugraph_random_walk_result_t* result);

/**
* @brief Get the matrix (row major order) of edge weights in the paths
*
* @param [in] result The result from extract_paths
* @return type erased array pointing to the edge weights in device memory
* @param [in] result The result from a random walk algorithm
* @return type erased array pointing to the path edge weights in device memory
*/
cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_weights(
cugraph_random_walk_result_t* result);

/**
* @brief If the random walk result is compressed, get the offsets
*
* @param [in] result The result from a random walk algorithm
* @return type erased array pointing to the edge offsets in device memory
*/
cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_offsets(
cugraph_random_walk_result_t* result);

/**
* @brief Free random walks result
*
Expand Down
220 changes: 220 additions & 0 deletions cpp/src/c_api/random_walks.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cugraph_c/algorithms.h>

#include <c_api/abstract_functor.hpp>
#include <c_api/graph.hpp>

#include <cugraph/algorithms.hpp>
#include <cugraph/detail/utility_wrappers.hpp>
#include <cugraph/graph_functions.hpp>
#include <cugraph/visitors/generic_cascaded_dispatch.hpp>

#include <raft/handle.hpp>

namespace cugraph {
namespace c_api {

struct cugraph_random_walk_result_t {
bool result_compressed_;
size_t max_path_length_;
cugraph_type_erased_device_array_t* paths_;
cugraph_type_erased_device_array_t* weights_;
cugraph_type_erased_device_array_t* offsets_;
};

struct node2vec_functor : public abstract_functor {
raft::handle_t const& handle_;
cugraph_graph_t* graph_;
cugraph_type_erased_device_array_view_t const* sources_;
size_t max_depth_;
bool compress_result_;
double p_;
double q_;
cugraph_random_walk_result_t* result_{};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it our convention to define member variables before functions? And don't forget {} at the end of variable names.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the {}. Not sure what you mean by your question.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What we typically have is

struct {
  functions(); ...
  variables; ...
};

but here

struct {
  variables;...
  functions(); ...
 };

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason for this?

Copy link
Collaborator Author

@ChuckHastings ChuckHastings Feb 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. I interpreted your question as the opposite - hence my confusion.

What I have generally seen in our code base is that when we define a class we define it as:

class {
public:
   functions;
private:
   private_functions;
   variables;
};

But when we define a struct (essentially the same, only real difference is that everything is public) we define it as:

struct {
  variables;
  functions;
};

Admittedly, most of the structs that we define are to work around the nvcc problem with lambdas. My feeling (from a design perspective) was that this functor is really a heavier weight lambda. But you are correct in observing that it is more class-like. It does derive from an abstract struct, hence the need for a constructor as well as an operator()().

This is consistent across all of the C API functions. We can certainly change it if we like, but if we do we should fix the entire C API (and better to do it before we add too many more calls).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I guess the sampling_params_t is a struct but follows the class description in my above comment. So we have inconsistency there as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, yes, you're right, actually, I also do {vairables; operators();} for struct (i.e. functors). Seeing https://github.com/rapidsai/cugraph/pull/2069/files#diff-1264d5c0712616b080bb085897241ca6ad0bd8d4d132c83a75be4edf229ce988L28, I erroneously thought that we do functions first (do we consider constructors as something special, or should this better be class, or should we resolve inconsistencies in ordering?).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@seunghwak - I'll make a note to consider this and write up some options for us to discuss.


node2vec_functor(raft::handle_t const& handle,
cugraph_graph_t* graph,
cugraph_type_erased_device_array_view_t const* sources,
size_t max_depth,
bool compress_result,
double p,
double q)
: abstract_functor(),
handle_(handle),
graph_(graph),
sources_(sources),
max_depth_(max_depth),
compress_result_(compress_result),
p_(p),
q_(q)
{
}

template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
void operator()()
{
// FIXME: Think about how to handle SG vice MG
if constexpr (!cugraph::is_candidate<vertex_t, edge_t, weight_t>::value) {
unsupported();
} else if constexpr (multi_gpu) {
unsupported();
} else {
// node2vec expects store_transposed == false
if constexpr (store_transposed) {
error_code_ = cugraph::c_api::
transpose_storage<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>(
handle_, graph_, error_.get());
if (error_code_ != CUGRAPH_SUCCESS) return;
}

auto graph =
reinterpret_cast<cugraph::graph_t<vertex_t, edge_t, weight_t, false, multi_gpu>*>(
graph_->graph_);

auto graph_view = graph->view();

auto number_map = reinterpret_cast<rmm::device_uvector<vertex_t>*>(graph_->number_map_);

rmm::device_uvector<vertex_t> sources(sources_->size_, handle_.get_stream());
raft::copy(
sources.data(), sources_->as_type<vertex_t>(), sources.size(), handle_.get_stream());

//
// Need to renumber sources
//
renumber_ext_vertices<vertex_t, multi_gpu>(handle_,
sources.data(),
sources.size(),
number_map->data(),
graph_view.get_local_vertex_first(),
graph_view.get_local_vertex_last(),
false);

// FIXME: Forcing this to edge_t for now. What should it really be?
// Seems like it should be the smallest size that can accommodate
// max_depth_ * sources_->size_
auto [paths, weights, offsets] = cugraph::random_walks(
handle_,
graph_view,
sources.data(),
static_cast<edge_t>(sources.size()),
static_cast<edge_t>(max_depth_),
!compress_result_,
// std::make_unique<sampling_params_t>(2, p_, q_, false));
std::make_unique<sampling_params_t>(cugraph::sampling_strategy_t::NODE2VEC, p_, q_));

result_ = new cugraph_random_walk_result_t{
compress_result_,
max_depth_,
new cugraph_type_erased_device_array_t(paths, graph_->vertex_type_),
new cugraph_type_erased_device_array_t(weights, graph_->weight_type_),
new cugraph_type_erased_device_array_t(offsets, graph_->vertex_type_)};
}
}
};

} // namespace c_api
} // namespace cugraph

cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_type_erased_device_array_view_t* sources,
size_t max_depth,
bool_t compress_results,
double p,
double q,
cugraph_random_walk_result_t** result,
cugraph_error_t** error)
{
*result = nullptr;
*error = nullptr;

try {
auto p_handle = reinterpret_cast<raft::handle_t const*>(handle);
auto p_graph = reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph);
auto p_sources =
reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(sources);

cugraph::c_api::node2vec_functor functor(
*p_handle, p_graph, p_sources, max_depth, compress_results, p, q);

// FIXME: This seems like a recurring pattern. Can I encapsulate
// The vertex_dispatcher and error handling calls into a reusable function?
// After all, we're in C++ here.
cugraph::dispatch::vertex_dispatcher(cugraph::c_api::dtypes_mapping[p_graph->vertex_type_],
cugraph::c_api::dtypes_mapping[p_graph->edge_type_],
cugraph::c_api::dtypes_mapping[p_graph->weight_type_],
p_graph->store_transposed_,
p_graph->multi_gpu_,
functor);

if (functor.error_code_ != CUGRAPH_SUCCESS) {
*error = reinterpret_cast<cugraph_error_t*>(functor.error_.release());
return functor.error_code_;
}

*result = reinterpret_cast<cugraph_random_walk_result_t*>(functor.result_);
} catch (std::exception const& ex) {
*error = reinterpret_cast<cugraph_error_t*>(new cugraph::c_api::cugraph_error_t{ex.what()});
return CUGRAPH_UNKNOWN_ERROR;
}

return CUGRAPH_SUCCESS;
}

size_t cugraph_random_walk_result_get_max_path_length(cugraph_random_walk_result_t* result)
{
auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_random_walk_result_t*>(result);
return internal_pointer->max_path_length_;
}

cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_paths(
cugraph_random_walk_result_t* result)
{
auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_random_walk_result_t*>(result);
return reinterpret_cast<cugraph_type_erased_device_array_view_t*>(
internal_pointer->paths_->view());
}

cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_weights(
cugraph_random_walk_result_t* result)
{
auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_random_walk_result_t*>(result);
return reinterpret_cast<cugraph_type_erased_device_array_view_t*>(
internal_pointer->weights_->view());
}

cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_offsets(
cugraph_random_walk_result_t* result)
{
auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_random_walk_result_t*>(result);
return reinterpret_cast<cugraph_type_erased_device_array_view_t*>(
internal_pointer->offsets_->view());
}

void cugraph_random_walk_result_free(cugraph_random_walk_result_t* result)
{
auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_random_walk_result_t*>(result);
delete internal_pointer->paths_;
delete internal_pointer->weights_;
delete internal_pointer;
}
Loading