diff --git a/CHANGELOG.md b/CHANGELOG.md index ac6c1b9400d..b8e2f034623 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,8 @@ - PR #1025: Explicitly skip raft test folder for pytest 6.0.0 - PR #1027 Fix documentation - PR #1033 Fix reparition error in big datasets, updated coroutine, fixed warnings +- PR #1036 Fixed benchmarks for new renumbering API, updated comments, added quick test-only benchmark run to CI +- PR #1040 Fix spectral clustering renumbering issue # cuGraph 0.14.0 (03 Jun 2020) diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py index 836ed61013e..2279a5aee21 100644 --- a/benchmarks/bench_algos.py +++ b/benchmarks/bench_algos.py @@ -17,6 +17,7 @@ def setFixtureParamNames(*args, **kwargs): pass import cugraph +from cugraph.structure.number_map import NumberMap from cugraph.tests import utils import rmm @@ -174,9 +175,7 @@ def bench_create_digraph(gpubenchmark, edgelistCreated): @pytest.mark.ETL def bench_renumber(gpubenchmark, edgelistCreated): - gpubenchmark(cugraph.renumber, - edgelistCreated["0"], # src - edgelistCreated["1"]) # dst + gpubenchmark(NumberMap.renumber, edgelistCreated, "0", "1") def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed): diff --git a/benchmarks/params.py b/benchmarks/params.py index 949e7fbde86..72a0d24467e 100644 --- a/benchmarks/params.py +++ b/benchmarks/params.py @@ -60,6 +60,8 @@ def genFixtureParamsProduct(*args): # https://docs.rapids.ai/maintainers/datasets # FIXME: rlr: soc-twitter-2010.csv crashes with OOM error on my RTX-8000 UNDIRECTED_DATASETS = [ + pytest.param("../datasets/karate.csv", + marks=[pytest.mark.tiny, pytest.mark.undirected]), pytest.param("../datasets/csv/undirected/hollywood.csv", marks=[pytest.mark.small, pytest.mark.undirected]), pytest.param("../datasets/csv/undirected/europe_osm.csv", diff --git a/benchmarks/pytest.ini b/benchmarks/pytest.ini index 7096c677b3b..06a67a06040 100644 --- a/benchmarks/pytest.ini +++ b/benchmarks/pytest.ini @@ -12,6 +12,7 @@ markers = poolallocator_off: RMM pool allocator disabled ETL: benchmarks for ETL steps small: small datasets + tiny: tiny datasets directed: directed datasets undirected: undirected datasets diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index db72a900ca2..a65988e1b1d 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -64,9 +64,10 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul "dask-cuda=${MINOR_VERSION}" \ "ucx-py=${MINOR_VERSION}" \ "rapids-build-env=$MINOR_VERSION.*" \ - "rapids-notebook-env=$MINOR_VERSION.*" + "rapids-notebook-env=$MINOR_VERSION.*" \ + rapids-pytest-benchmark -# https://docs.rapids.ai/maintainers/depmgmt/ +# https://docs.rapids.ai/maintainers/depmgmt/ # conda remove --force rapids-build-env rapids-notebook-env # conda install "your-pkg=1.0.0" diff --git a/ci/test.sh b/ci/test.sh index 2df02d0bc2a..3bbd892537b 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -59,4 +59,9 @@ cd ${CUGRAPH_ROOT}/python pytest --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph.xml -v --cov-config=.coveragerc --cov=cugraph --cov-report=xml:${WORKSPACE}/python/cugraph/cugraph-coverage.xml --cov-report term --ignore=cugraph/raft ERRORCODE=$((ERRORCODE | $?)) +echo "Python benchmarks for cuGraph (running as tests)..." +cd ${CUGRAPH_ROOT}/benchmarks +pytest -v -m "managedmem_on and poolallocator_on and tiny" --benchmark-disable +ERRORCODE=$((ERRORCODE | $?)) + exit ${ERRORCODE} diff --git a/python/cugraph/community/spectral_clustering.py b/python/cugraph/community/spectral_clustering.py index b5b11917551..a775fedb677 100644 --- a/python/cugraph/community/spectral_clustering.py +++ b/python/cugraph/community/spectral_clustering.py @@ -82,7 +82,11 @@ def spectralBalancedCutClustering( ) if G.renumbered: - df = G.unrenumber(df, "vertex") + # FIXME: This is a hack to get around an + # API problem. The spectral API assumes that + # the data frame remains in internal vertex + # id order. It should not do that. + df = G.unrenumber(df, "vertex", preserve_order=True) return df diff --git a/python/cugraph/layout/force_atlas2.py b/python/cugraph/layout/force_atlas2.py index f9269e6ceab..4a61e2a345b 100644 --- a/python/cugraph/layout/force_atlas2.py +++ b/python/cugraph/layout/force_atlas2.py @@ -128,7 +128,11 @@ def on_train_end(self, positions): verbose=verbose, callback=callback, ) - + # If the caller passed in a pos_list, those values are already mapped to + # original numbering in the call to force_atlas2_wrapper.force_atlas2(), + # but if the caller did not specify a pos_list and the graph was + # renumbered, the pos dataframe should be mapped back to the original + # numbering. if pos_list is None and input_graph.renumbered: pos = input_graph.unrenumber(pos, "vertex") diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index a0a62ee3ca3..7b465d25f5e 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -932,6 +932,7 @@ def to_directed(self): if type(self) is Graph: DiG = DiGraph() DiG.renumbered = self.renumbered + DiG.renumber_map = self.renumber_map DiG.edgelist = self.edgelist DiG.adjlist = self.adjlist DiG.transposedadjlist = self.transposedadjlist @@ -964,6 +965,7 @@ def to_undirected(self): G = Graph() df = self.edgelist.edgelist_df G.renumbered = self.renumbered + G.renumber_map = self.renumber_map if self.edgelist.weights: source_col, dest_col, value_col = symmetrize( df["src"], df["dst"], df["weights"] @@ -1103,22 +1105,6 @@ def unrenumber(self, df, column_name, preserve_order=False): The original DataFrame columns exist unmodified. The external vertex identifiers are added to the DataFrame, the internal vertex identifier column is removed from the dataframe. - - Examples - -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> - >>> df, number_map = NumberMap.renumber(df, '0', '1') - >>> - >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(df, 'src', 'dst') - >>> - >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, - >>> tol = 1.0e-05) - >>> - >>> pr = number_map.unrenumber(pr, 'vertex') - >>> """ return self.renumber_map.unrenumber(df, column_name, preserve_order) diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index 1ce566ecdcb..0646c074c7f 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -669,10 +669,10 @@ def column_names(self): """ return self.implementation.col_names - def renumber(df, source_columns, dest_columns, preserve_order=False): + def renumber(df, src_col_names, dst_col_names, preserve_order=False): """ - Given a single GPU or distributed DataFrame, use source_columns and - dest_columns to identify the source vertex identifiers and destination + Given a single GPU or distributed DataFrame, use src_col_names and + dst_col_names to identify the source vertex identifiers and destination vertex identifiers, respectively. Internal vertex identifiers will be created, numbering vertices as @@ -694,11 +694,11 @@ def renumber(df, source_columns, dest_columns, preserve_order=False): df: cudf.DataFrame or dask_cudf.DataFrame Contains a list of external vertex identifiers that will be numbered by the NumberMap class. - src_col_names: list of strings + src_col_names: string or list of strings This list of 1 or more strings contain the names of the columns that uniquely identify an external vertex identifier for source vertices - dst_col_names: list of strings + dst_col_names: string or list of strings This list of 1 or more strings contain the names of the columns that uniquely identify an external vertex identifier for destination vertices @@ -729,25 +729,25 @@ def renumber(df, source_columns, dest_columns, preserve_order=False): """ renumber_map = NumberMap() - if isinstance(source_columns, list): - renumber_map.from_dataframe(df, source_columns, dest_columns) + if isinstance(src_col_names, list): + renumber_map.from_dataframe(df, src_col_names, dst_col_names) df = renumber_map.add_internal_vertex_id( - df, "src", source_columns, drop=True, + df, "src", src_col_names, drop=True, preserve_order=preserve_order ) df = renumber_map.add_internal_vertex_id( - df, "dst", dest_columns, drop=True, + df, "dst", dst_col_names, drop=True, preserve_order=preserve_order ) else: - renumber_map.from_dataframe(df, [source_columns], [dest_columns]) + renumber_map.from_dataframe(df, [src_col_names], [dst_col_names]) df = renumber_map.add_internal_vertex_id( - df, "src", source_columns, drop=True, + df, "src", src_col_names, drop=True, preserve_order=preserve_order ) df = renumber_map.add_internal_vertex_id( - df, "dst", dest_columns, drop=True, + df, "dst", dst_col_names, drop=True, preserve_order=preserve_order )