Merge pull request #1044 from rapidsai/branch-0.15

[gpuCI] Auto-merge branch-0.15 to branch-0.16 [skip ci]
rapidsai · Aug 7, 2020 · 1962012 · 1962012
2 parents 8de1e40 + cc76db2
commit 1962012
Show file tree

Hide file tree

Showing 10 changed files with 39 additions and 35 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -76,6 +76,8 @@
 - PR #1025: Explicitly skip raft test folder for pytest 6.0.0
 - PR #1027 Fix documentation
 - PR #1033 Fix reparition error in big datasets, updated coroutine, fixed warnings
+- PR #1036 Fixed benchmarks for new renumbering API, updated comments, added quick test-only benchmark run to CI
+- PR #1040 Fix spectral clustering renumbering issue
 
 # cuGraph 0.14.0 (03 Jun 2020)
 

diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py
@@ -17,6 +17,7 @@ def setFixtureParamNames(*args, **kwargs):
         pass
 
 import cugraph
+from cugraph.structure.number_map import NumberMap
 from cugraph.tests import utils
 import rmm
 
@@ -174,9 +175,7 @@ def bench_create_digraph(gpubenchmark, edgelistCreated):
 
 @pytest.mark.ETL
 def bench_renumber(gpubenchmark, edgelistCreated):
-    gpubenchmark(cugraph.renumber,
-                 edgelistCreated["0"],  # src
-                 edgelistCreated["1"])  # dst
+    gpubenchmark(NumberMap.renumber, edgelistCreated, "0", "1")
 
 
 def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed):

diff --git a/benchmarks/params.py b/benchmarks/params.py
@@ -60,6 +60,8 @@ def genFixtureParamsProduct(*args):
 #        https://docs.rapids.ai/maintainers/datasets
 # FIXME: rlr: soc-twitter-2010.csv crashes with OOM error on my RTX-8000
 UNDIRECTED_DATASETS = [
+    pytest.param("../datasets/karate.csv",
+                 marks=[pytest.mark.tiny, pytest.mark.undirected]),
     pytest.param("../datasets/csv/undirected/hollywood.csv",
                  marks=[pytest.mark.small, pytest.mark.undirected]),
     pytest.param("../datasets/csv/undirected/europe_osm.csv",

diff --git a/benchmarks/pytest.ini b/benchmarks/pytest.ini
@@ -12,6 +12,7 @@ markers =
           poolallocator_off: RMM pool allocator disabled
           ETL: benchmarks for ETL steps
           small: small datasets
+          tiny: tiny datasets
           directed: directed datasets
           undirected: undirected datasets
 

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -64,9 +64,10 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul
       "dask-cuda=${MINOR_VERSION}" \
       "ucx-py=${MINOR_VERSION}" \
       "rapids-build-env=$MINOR_VERSION.*" \
-      "rapids-notebook-env=$MINOR_VERSION.*"
+      "rapids-notebook-env=$MINOR_VERSION.*" \
+      rapids-pytest-benchmark
 
-# https://docs.rapids.ai/maintainers/depmgmt/ 
+# https://docs.rapids.ai/maintainers/depmgmt/
 # conda remove --force rapids-build-env rapids-notebook-env
 # conda install "your-pkg=1.0.0"
 

diff --git a/ci/test.sh b/ci/test.sh
@@ -59,4 +59,9 @@ cd ${CUGRAPH_ROOT}/python
 pytest --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph.xml -v --cov-config=.coveragerc --cov=cugraph --cov-report=xml:${WORKSPACE}/python/cugraph/cugraph-coverage.xml --cov-report term --ignore=cugraph/raft
 ERRORCODE=$((ERRORCODE | $?))
 
+echo "Python benchmarks for cuGraph (running as tests)..."
+cd ${CUGRAPH_ROOT}/benchmarks
+pytest -v -m "managedmem_on and poolallocator_on and tiny" --benchmark-disable
+ERRORCODE=$((ERRORCODE | $?))
+
 exit ${ERRORCODE}
diff --git a/python/cugraph/community/spectral_clustering.py b/python/cugraph/community/spectral_clustering.py
@@ -82,7 +82,11 @@ def spectralBalancedCutClustering(
     )
 
     if G.renumbered:
-        df = G.unrenumber(df, "vertex")
+        # FIXME:  This is a hack to get around an
+        # API problem.  The spectral API assumes that
+        # the data frame remains in internal vertex
+        # id order.  It should not do that.
+        df = G.unrenumber(df, "vertex", preserve_order=True)
 
     return df
 

diff --git a/python/cugraph/layout/force_atlas2.py b/python/cugraph/layout/force_atlas2.py
@@ -128,7 +128,11 @@ def on_train_end(self, positions):
         verbose=verbose,
         callback=callback,
     )
-
+    # If the caller passed in a pos_list, those values are already mapped to
+    # original numbering in the call to force_atlas2_wrapper.force_atlas2(),
+    # but if the caller did not specify a pos_list and the graph was
+    # renumbered, the pos dataframe should be mapped back to the original
+    # numbering.
     if pos_list is None and input_graph.renumbered:
         pos = input_graph.unrenumber(pos, "vertex")
 

diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py
@@ -932,6 +932,7 @@ def to_directed(self):
         if type(self) is Graph:
             DiG = DiGraph()
             DiG.renumbered = self.renumbered
+            DiG.renumber_map = self.renumber_map
             DiG.edgelist = self.edgelist
             DiG.adjlist = self.adjlist
             DiG.transposedadjlist = self.transposedadjlist
@@ -964,6 +965,7 @@ def to_undirected(self):
             G = Graph()
             df = self.edgelist.edgelist_df
             G.renumbered = self.renumbered
+            G.renumber_map = self.renumber_map
             if self.edgelist.weights:
                 source_col, dest_col, value_col = symmetrize(
                     df["src"], df["dst"], df["weights"]
@@ -1103,22 +1105,6 @@ def unrenumber(self, df, column_name, preserve_order=False):
             The original DataFrame columns exist unmodified.  The external
             vertex identifiers are added to the DataFrame, the internal
             vertex identifier column is removed from the dataframe.
-
-        Examples
-        --------
-        >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
-        >>>                   dtype=['int32', 'int32', 'float32'], header=None)
-        >>>
-        >>> df, number_map = NumberMap.renumber(df, '0', '1')
-        >>>
-        >>> G = cugraph.Graph()
-        >>> G.from_cudf_edgelist(df, 'src', 'dst')
-        >>>
-        >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500,
-        >>>                       tol = 1.0e-05)
-        >>>
-        >>> pr = number_map.unrenumber(pr, 'vertex')
-        >>>
         """
         return self.renumber_map.unrenumber(df, column_name, preserve_order)
 

diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py
@@ -669,10 +669,10 @@ def column_names(self):
         """
         return self.implementation.col_names
 
-    def renumber(df, source_columns, dest_columns, preserve_order=False):
+    def renumber(df, src_col_names, dst_col_names, preserve_order=False):
         """
-        Given a single GPU or distributed DataFrame, use source_columns and
-        dest_columns to identify the source vertex identifiers and destination
+        Given a single GPU or distributed DataFrame, use src_col_names and
+        dst_col_names to identify the source vertex identifiers and destination
         vertex identifiers, respectively.
 
         Internal vertex identifiers will be created, numbering vertices as
@@ -694,11 +694,11 @@ def renumber(df, source_columns, dest_columns, preserve_order=False):
         df: cudf.DataFrame or dask_cudf.DataFrame
             Contains a list of external vertex identifiers that will be
             numbered by the NumberMap class.
-        src_col_names: list of strings
+        src_col_names: string or list of strings
             This list of 1 or more strings contain the names
             of the columns that uniquely identify an external
             vertex identifier for source vertices
-        dst_col_names: list of strings
+        dst_col_names: string or list of strings
             This list of 1 or more strings contain the names
             of the columns that uniquely identify an external
             vertex identifier for destination vertices
@@ -729,25 +729,25 @@ def renumber(df, source_columns, dest_columns, preserve_order=False):
         """
         renumber_map = NumberMap()
 
-        if isinstance(source_columns, list):
-            renumber_map.from_dataframe(df, source_columns, dest_columns)
+        if isinstance(src_col_names, list):
+            renumber_map.from_dataframe(df, src_col_names, dst_col_names)
             df = renumber_map.add_internal_vertex_id(
-                df, "src", source_columns, drop=True,
+                df, "src", src_col_names, drop=True,
                 preserve_order=preserve_order
             )
             df = renumber_map.add_internal_vertex_id(
-                df, "dst", dest_columns, drop=True,
+                df, "dst", dst_col_names, drop=True,
                 preserve_order=preserve_order
             )
         else:
-            renumber_map.from_dataframe(df, [source_columns], [dest_columns])
+            renumber_map.from_dataframe(df, [src_col_names], [dst_col_names])
             df = renumber_map.add_internal_vertex_id(
-                df, "src", source_columns, drop=True,
+                df, "src", src_col_names, drop=True,
                 preserve_order=preserve_order
             )
 
             df = renumber_map.add_internal_vertex_id(
-                df, "dst", dest_columns, drop=True,
+                df, "dst", dst_col_names, drop=True,
                 preserve_order=preserve_order
             )