From 6a948025ccde80dcbc4d2125e1c2a6e06c0f3d00 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Thu, 13 May 2021 10:11:36 -0400 Subject: [PATCH 1/4] Fixing broken path utility (#1576) Fix: - Broken path utility Authors: - Brad Rees (https://github.com/BradReesWork) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/1576 --- python/cugraph/utilities/utils.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index adaec0f9e44..01bae895e5a 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -62,8 +62,8 @@ def get_traversed_path(df, id): ---------- df : cudf.DataFrame The dataframe containing the results of a BFS or SSSP call - id : Int - The vertex ID + id : vertex ID + most be the same data types as what is in the dataframe Returns --------- @@ -97,8 +97,9 @@ def get_traversed_path(df, id): "DataFrame does not appear to be a BFS or " "SSP result - 'predecessor' column missing" ) - if type(id) != int: - raise ValueError("The vertex 'id' needs to be an integer") + if isinstance(id, type(df['vertex'].iloc[0])): + raise ValueError( + "The vertex 'id' needs to be the same as df['vertex']") # There is no guarantee that the dataframe has not been filtered # or edited. Therefore we cannot assume that using the vertex ID @@ -161,8 +162,9 @@ def get_traversed_path_list(df, id): "DataFrame does not appear to be a BFS or " "SSP result - 'predecessor' column missing" ) - if type(id) != int: - raise ValueError("The vertex 'id' needs to be an integer") + if isinstance(id, type(df['vertex'].iloc[0])): + raise ValueError( + "The vertex 'id' needs to be the same as df['vertex']") # There is no guarantee that the dataframe has not been filtered # or edited. Therefore we cannot assume that using the vertex ID From 9853ecaa7c799919f93742358eb6b0a8135c5e61 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 13 May 2021 10:27:25 -0400 Subject: [PATCH 2/4] Update the relabel function to skip relabeling on missing labels (#1598) Relabel function relabels old labels to new labels based on the provided (old_label, new_label) pairs. The provided pairs should include the entire set of old labels to be relabeled. This updates add an additional parameter ```skip_missing_labels``` to skip relabeling for labels missing in the provided pairs. This update is necessary for the new WCC implementation. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/1598 --- .../cugraph/experimental/graph_functions.hpp | 5 +++ cpp/src/community/flatten_dendrogram.cuh | 3 +- cpp/src/experimental/coarsen_graph.cu | 2 +- cpp/src/experimental/louvain.cuh | 3 +- cpp/src/experimental/relabel.cu | 45 ++++++++++++++++--- 5 files changed, 48 insertions(+), 10 deletions(-) diff --git a/cpp/include/cugraph/experimental/graph_functions.hpp b/cpp/include/cugraph/experimental/graph_functions.hpp index cb1b90a6e8e..36881aea5fa 100644 --- a/cpp/include/cugraph/experimental/graph_functions.hpp +++ b/cpp/include/cugraph/experimental/graph_functions.hpp @@ -338,6 +338,10 @@ coarsen_graph( * @param labels Labels to be relabeled. This initially holds old labels. Old labels are updated to * new labels in-place ([INOUT] parameter). * @param num_labels Number of labels to be relabeled. + * @param skip_missing_labels Flag dictating the behavior on missing labels (@p labels contains old + * labels missing in @p old_new_label_pairs). If set to true, missing elements are skipped (not + * relabeled). If set to false, undefined behavior (if @p do_expensive_check is set to true, this + * function will throw an exception). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return rmm::device_uvector New labels corresponding to the @p old_labels. */ @@ -347,6 +351,7 @@ void relabel(raft::handle_t const& handle, vertex_t num_label_pairs, vertex_t* labels /* [INOUT] */, vertex_t num_labels, + bool skip_missing_labels, bool do_expensive_check = false); /** diff --git a/cpp/src/community/flatten_dendrogram.cuh b/cpp/src/community/flatten_dendrogram.cuh index 3359fea87e5..ff6446b0e5f 100644 --- a/cpp/src/community/flatten_dendrogram.cuh +++ b/cpp/src/community/flatten_dendrogram.cuh @@ -51,7 +51,8 @@ void partition_at_level(raft::handle_t const &handle, dendrogram.get_level_ptr_nocheck(l)), dendrogram.get_level_size_nocheck(l), d_partition, - local_num_verts); + local_num_verts, + false); }); } diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 2e476a9b54d..9f3f7c968cc 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -284,7 +284,7 @@ coarsen_graph( store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols(i) : graph_view.get_number_of_local_adj_matrix_partition_rows(i), handle.get_stream()); - if (col_comm_rank == i) { + if (col_comm_rank == static_cast(i)) { // FIXME: this copy is unnecessary, beter fix RAFT comm's bcast to take const iterators for // input thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 582b07d39d2..1e618482f68 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -527,7 +527,8 @@ class Louvain { static_cast(numbering_indices.begin())), current_graph_view_.get_number_of_local_vertices(), dendrogram_->current_level_begin(), - dendrogram_->current_level_size()); + dendrogram_->current_level_size(), + false); timer_stop(handle_.get_stream()); } diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index eb257acf432..5360fad0031 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -49,6 +49,7 @@ void relabel(raft::handle_t const& handle, vertex_t num_label_pairs, vertex_t* labels /* [INOUT] */, vertex_t num_labels, + bool skip_missing_labels, bool do_expensive_check) { double constexpr load_factor = 0.7; @@ -156,11 +157,24 @@ void relabel(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - relabel_map.find( - rx_unique_old_labels.begin(), - rx_unique_old_labels.end(), - rx_unique_old_labels - .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels + if (skip_missing_labels) { + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels.begin(), + [view = relabel_map.get_device_view()] __device__(auto old_label) { + auto found = view.find(old_label); + return found != view.end() ? view.find(old_label)->second.load( + cuda::std::memory_order_relaxed) + : old_label; + }); + } else { + relabel_map.find( + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels.begin()); // now rx_unique_old_lables hold new labels for the + // corresponding old labels + } std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); @@ -201,10 +215,23 @@ void relabel(raft::handle_t const& handle, }); relabel_map.insert(pair_first, pair_first + num_label_pairs); - relabel_map.find(labels, labels + num_labels, labels); + if (skip_missing_labels) { + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + num_labels, + labels, + [view = relabel_map.get_device_view()] __device__(auto old_label) { + auto found = view.find(old_label); + return found != view.end() ? view.find(old_label)->second.load( + cuda::std::memory_order_relaxed) + : old_label; + }); + } else { + relabel_map.find(labels, labels + num_labels, labels); + } } - if (do_expensive_check) { + if (do_expensive_check && !skip_missing_labels) { CUGRAPH_EXPECTS( thrust::count(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), labels, @@ -224,6 +251,7 @@ template void relabel(raft::handle_t const& handle, int32_t num_label_pairs, int32_t* labels, int32_t num_labels, + bool skip_missing_labels, bool do_expensive_check); template void relabel( @@ -232,6 +260,7 @@ template void relabel( int32_t num_label_pairs, int32_t* labels, int32_t num_labels, + bool skip_missing_labels, bool do_expensive_check); template void relabel(raft::handle_t const& handle, @@ -239,6 +268,7 @@ template void relabel(raft::handle_t const& handle, int64_t num_label_pairs, int64_t* labels, int64_t num_labels, + bool skip_missing_labels, bool do_expensive_check); template void relabel( @@ -247,6 +277,7 @@ template void relabel( int64_t num_label_pairs, int64_t* labels, int64_t num_labels, + bool skip_missing_labels, bool do_expensive_check); } // namespace experimental From 7c326ac102ecb5bc75f4d148ceedcb40694c8a3e Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Thu, 13 May 2021 10:30:03 -0400 Subject: [PATCH 3/4] Update release script (#1600) Update the release script to take a parameter with the new version instead of calculating the new version. Authors: - Ray Douglass (https://github.com/raydouglass) Approvers: - Dillon Cullinan (https://github.com/dillon-cullinan) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cugraph/pull/1600 --- ci/release/update-version.sh | 45 +++++++++++------------------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 7cd0d9720fc..f381ed7f6fb 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -13,42 +13,25 @@ # limitations under the License. ## Usage -# bash update-version.sh -# where is either `major`, `minor`, `patch` +# bash update-version.sh -set -e -# Grab argument for release type -RELEASE_TYPE=$1 +# Format is YY.MM.PP - no leading 'v' or trailing 'a' +NEXT_FULL_TAG=$1 -# Get current version and calculate next versions -CURRENT_TAG=`git tag | grep -xE 'v[0-9\.]+' | sort --version-sort | tail -n 1 | tr -d 'v'` -CURRENT_MAJOR=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}'` -CURRENT_MINOR=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}'` -CURRENT_PATCH=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}'` +# Get current version +CURRENT_TAG=$(git tag | grep -xE 'v[0-9\.]+' | sort --version-sort | tail -n 1 | tr -d 'v') +CURRENT_MAJOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}') +CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}') +CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}') CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR} -NEXT_MAJOR=$((CURRENT_MAJOR + 1)) -NEXT_MINOR=$((CURRENT_MINOR + 1)) -NEXT_PATCH=$((CURRENT_PATCH + 1)) -NEXT_FULL_TAG="" -NEXT_SHORT_TAG="" -# Determine release type -if [ "$RELEASE_TYPE" == "major" ]; then - NEXT_FULL_TAG="${NEXT_MAJOR}.0.0" - NEXT_SHORT_TAG="${NEXT_MAJOR}.0" -elif [ "$RELEASE_TYPE" == "minor" ]; then - NEXT_FULL_TAG="${CURRENT_MAJOR}.${NEXT_MINOR}.0" - NEXT_SHORT_TAG="${CURRENT_MAJOR}.${NEXT_MINOR}" -elif [ "$RELEASE_TYPE" == "patch" ]; then - NEXT_FULL_TAG="${CURRENT_MAJOR}.${CURRENT_MINOR}.${NEXT_PATCH}" - NEXT_SHORT_TAG="${CURRENT_MAJOR}.${CURRENT_MINOR}" -else - echo "Incorrect release type; use 'major', 'minor', or 'patch' as an argument" - exit 1 -fi +#Get . for next version +NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}') +NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') +NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} -echo "Preparing '$RELEASE_TYPE' release [$CURRENT_TAG -> $NEXT_FULL_TAG]" +echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" # Inplace sed replace; workaround for Linux and Mac function sed_runner() { @@ -69,4 +52,4 @@ for FILE in conda/environments/*.yml; do sed_runner "s/dask-cudf=${CURRENT_SHORT_TAG}/dask-cudf=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/ucx-py=${CURRENT_SHORT_TAG}/ucx-py=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/cuxfilter=${CURRENT_SHORT_TAG}/cuxfilter=${NEXT_SHORT_TAG}/g" ${FILE}; -done +done \ No newline at end of file From 9c42957fe98f913e7c3d55fb8788bf47db60b23e Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 13 May 2021 09:35:34 -0500 Subject: [PATCH 4/4] Added `uninstall` target which uninstalls libcugraph and cugraph from a prior build/install step (#1601) Added `uninstall` target which uninstalls libcugraph and cugraph from a prior build/install step. Also extended the `clean` target to remove inplace artifacts (mainly cython-generated cpp files and inplace `.so` python extensions built from cython). NOTE: since targets can be combined on the same command, a user can get the "scrub" behavior by running: ``` ./build.sh uninstall clean ``` Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/1601 --- build.sh | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/build.sh b/build.sh index 7c99b27f632..682e9b5ed33 100755 --- a/build.sh +++ b/build.sh @@ -19,10 +19,11 @@ ARGS=$* REPODIR=$(cd $(dirname $0); pwd) LIBCUGRAPH_BUILD_DIR=${LIBCUGRAPH_BUILD_DIR:=${REPODIR}/cpp/build} -VALIDARGS="clean libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" +VALIDARGS="clean uninstall libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) + uninstall - uninstall libcugraph and cugraph from a prior build/install (see also -n) libcugraph - build the cugraph C++ code cugraph - build the cugraph Python package cpp-mgtests - build libcugraph mnmg tests. Builds MPI communicator, adding MPI as a dependency. @@ -30,7 +31,7 @@ HELP="$0 [ ...] [ ...] and is: -v - verbose build mode -g - build for debug - -n - no install step + -n - do not install after a successful build --allgpuarch - build for all supported GPU architectures --buildfaiss - build faiss statically into cugraph --show_depr_warn - show cmake deprecation warnings @@ -107,12 +108,36 @@ if hasArg cpp-mgtests; then BUILD_CPP_MG_TESTS=ON fi -# If clean given, run it prior to any other steps +# If clean or uninstall given, run them prior to any other steps +if hasArg uninstall; then + # uninstall libcugraph + if [[ "$INSTALL_PREFIX" != "" ]]; then + rm -rf ${INSTALL_PREFIX}/include/cugraph + rm -f ${INSTALL_PREFIX}/lib/libcugraph.so + fi + # This may be redundant given the above, but can also be used in case + # there are other installed files outside of the locations above. + if [ -e ${LIBCUGRAPH_BUILD_DIR}/install_manifest.txt ]; then + xargs rm -f < ${LIBCUGRAPH_BUILD_DIR}/install_manifest.txt > /dev/null 2>&1 + fi + # uninstall cugraph installed from a prior "setup.py install" + pip uninstall -y cugraph +fi + if hasArg clean; then - # FIXME: ideally the "setup.py clean" command below would also be run to - # remove all the "inplace" python build artifacts, but currently, running - # any setup.py command has side effects (eg. cloning repos). - #(cd ${REPODIR}/python && python setup.py clean) + # remove artifacts generated inplace + # FIXME: ideally the "setup.py clean" command would be used for this, but + # currently running any setup.py command has side effects (eg. cloning + # repos). + # (cd ${REPODIR}/python && python setup.py clean) + if [[ -d ${REPODIR}/python ]]; then + pushd ${REPODIR}/python > /dev/null + rm -rf dist dask-worker-space cugraph/raft *.egg-info + find . -name "__pycache__" -type d -exec rm -rf {} \; > /dev/null 2>&1 + find . -name "*.cpp" -type f -delete + find . -name "*.cpython*.so" -type f -delete + popd > /dev/null + fi # If the dirs to clean are mounted dirs in a container, the contents should # be removed but the mounted dirs will remain. The find removes all