[REVIEW] update dask docs (#1223)

* update dask docs * changelog
rapidsai · Oct 14, 2020 · d83cff7 · d83cff7
1 parent 715e374
commit d83cff7
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 29 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -34,6 +34,8 @@
 - PR #1176 Update ci/local/README.md
 - PR #1184 BLD getting latest tags
 - PR #1217 NetworkX Transition doc
+- PR #1223 Update mnmg docs
+
 
 ## Bug Fixes
 - PR #1131 Show style checker errors with set +e

diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py
@@ -53,7 +53,7 @@ def louvain(input_graph, max_iter=100, resolution=1.0):
     Examples
     --------
     >>> import cugraph.dask as dcg
-    >>> Comms.initialize()
+    >>> Comms.initialize(p2p=True)
     >>> chunksize = dcg.get_chunksize(input_data_path)
     >>> ddf = dask_cudf.read_csv('datasets/karate.csv', chunksize=chunksize,
                                  delimiter=' ',

diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py
@@ -51,8 +51,7 @@ def pagerank(input_graph,
              personalization=None,
              max_iter=100,
              tol=1.0e-5,
-             nstart=None,
-             load_balance=True):
+             nstart=None):
 
     """
     Find the PageRank values for each vertex in a graph using multiple GPUs.
@@ -92,26 +91,21 @@ def pagerank(input_graph,
         acceptable.
     nstart : not supported
         initial guess for pagerank
-    load_balance : bool
-        Set as True to perform load_balancing after global sorting of
-        dask-cudf DataFrame. This ensures that the data is uniformly
-        distributed among multiple GPUs to avoid over-loading.
-
     Returns
     -------
     PageRank : dask_cudf.DataFrame
         GPU data frame containing two dask_cudf.Series of size V: the
         vertex identifiers and the corresponding PageRank values.
 
-        ddf['vertex'] : cudf.Series
+        ddf['vertex'] : dask_cudf.Series
             Contains the vertex identifiers
-        ddf['pagerank'] : cudf.Series
+        ddf['pagerank'] : dask_cudf.Series
             Contains the PageRank score
 
     Examples
     --------
     >>> import cugraph.dask as dcg
-    >>> Comms.initialize()
+    >>> Comms.initialize(p2p=True)
     >>> chunksize = dcg.get_chunksize(input_data_path)
     >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
                                  delimiter=' ',

diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py
@@ -64,26 +64,26 @@ def bfs(graph,
 
     Returns
     -------
-    df : cudf.DataFrame
-        df['vertex'][i] gives the vertex id of the i'th vertex
+    df : dask_cudf.DataFrame
+        df['vertex'] gives the vertex id
 
-        df['distance'][i] gives the path distance for the i'th vertex from the
+        df['distance'] gives the path distance from the
         starting vertex (Only if return_distances is True)
 
-        df['predecessor'][i] gives for the i'th vertex the vertex it was
+        df['predecessor'] gives the vertex it was
         reached from in the traversal
 
     Examples
     --------
     >>> import cugraph.dask as dcg
-    >>> Comms.initialize()
+    >>> Comms.initialize(p2p=True)
     >>> chunksize = dcg.get_chunksize(input_data_path)
     >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
                                  delimiter=' ',
                                  names=['src', 'dst', 'value'],
                                  dtype=['int32', 'int32', 'float32'])
     >>> dg = cugraph.DiGraph()
-    >>> dg.from_dask_cudf_edgelist(ddf)
+    >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst')
     >>> df = dcg.bfs(dg, 0)
     >>> Comms.destroy()
     """

diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py
@@ -43,42 +43,47 @@ def sssp(graph,
          source):
 
     """
-    Find the distances and predecessors for a breadth first traversal of a
-    graph.
-    The input graph must contain edge list as  dask-cudf dataframe with
+    Compute the distance and predecessors for shortest paths from the specified
+    source to all the vertices in the graph. The distances column will store
+    the distance from the source to each vertex. The predecessors column will
+    store each vertex's predecessor in the shortest path. Vertices that are
+    unreachable will have a distance of infinity denoted by the maximum value
+    of the data type and the predecessor set as -1. The source vertex's
+    predecessor is also set to -1.
+    The input graph must contain edge list as dask-cudf dataframe with
     one partition per GPU.
 
     Parameters
     ----------
     graph : cugraph.DiGraph
         cuGraph graph descriptor, should contain the connectivity information
-        as dask cudf edge list dataframe(edge weights are not used for this
-        algorithm). Undirected Graph not currently supported.
+        as dask cudf edge list dataframe.
+        Undirected Graph not currently supported.
     source : Integer
         Specify source vertex
 
     Returns
     -------
-    df : cudf.DataFrame
-        df['vertex'][i] gives the vertex id of the i'th vertex
+    df : dask_cudf.DataFrame
+        df['vertex'] gives the vertex id
 
-        df['distance'][i] gives the path distance for the i'th vertex from the
-        starting vertex (Only if return_distances is True)
+        df['distance'] gives the path distance from the
+        starting vertex
 
-        df['predecessor'][i] gives for the i'th vertex the vertex it was
+        df['predecessor'] gives the vertex id it was
         reached from in the traversal
 
     Examples
     --------
     >>> import cugraph.dask as dcg
-    >>> Comms.initialize()
+    >>> Comms.initialize(p2p=True)
     >>> chunksize = dcg.get_chunksize(input_data_path)
     >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
                                  delimiter=' ',
                                  names=['src', 'dst', 'value'],
                                  dtype=['int32', 'int32', 'float32'])
     >>> dg = cugraph.DiGraph()
-    >>> dg.from_dask_cudf_edgelist(ddf)
+    >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst')
     >>> df = dcg.sssp(dg, 0)
     >>> Comms.destroy()
     """