Skip to content

Commit

Permalink
[REVIEW] update dask docs (#1223)
Browse files Browse the repository at this point in the history
* update dask docs

* changelog
  • Loading branch information
Iroy30 authored Oct 14, 2020
1 parent 715e374 commit d83cff7
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 29 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
- PR #1176 Update ci/local/README.md
- PR #1184 BLD getting latest tags
- PR #1217 NetworkX Transition doc
- PR #1223 Update mnmg docs


## Bug Fixes
- PR #1131 Show style checker errors with set +e
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/dask/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def louvain(input_graph, max_iter=100, resolution=1.0):
Examples
--------
>>> import cugraph.dask as dcg
>>> Comms.initialize()
>>> Comms.initialize(p2p=True)
>>> chunksize = dcg.get_chunksize(input_data_path)
>>> ddf = dask_cudf.read_csv('datasets/karate.csv', chunksize=chunksize,
delimiter=' ',
Expand Down
14 changes: 4 additions & 10 deletions python/cugraph/dask/link_analysis/pagerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ def pagerank(input_graph,
personalization=None,
max_iter=100,
tol=1.0e-5,
nstart=None,
load_balance=True):
nstart=None):

"""
Find the PageRank values for each vertex in a graph using multiple GPUs.
Expand Down Expand Up @@ -92,26 +91,21 @@ def pagerank(input_graph,
acceptable.
nstart : not supported
initial guess for pagerank
load_balance : bool
Set as True to perform load_balancing after global sorting of
dask-cudf DataFrame. This ensures that the data is uniformly
distributed among multiple GPUs to avoid over-loading.
Returns
-------
PageRank : dask_cudf.DataFrame
GPU data frame containing two dask_cudf.Series of size V: the
vertex identifiers and the corresponding PageRank values.
ddf['vertex'] : cudf.Series
ddf['vertex'] : dask_cudf.Series
Contains the vertex identifiers
ddf['pagerank'] : cudf.Series
ddf['pagerank'] : dask_cudf.Series
Contains the PageRank score
Examples
--------
>>> import cugraph.dask as dcg
>>> Comms.initialize()
>>> Comms.initialize(p2p=True)
>>> chunksize = dcg.get_chunksize(input_data_path)
>>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
delimiter=' ',
Expand Down
12 changes: 6 additions & 6 deletions python/cugraph/dask/traversal/bfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,26 +64,26 @@ def bfs(graph,
Returns
-------
df : cudf.DataFrame
df['vertex'][i] gives the vertex id of the i'th vertex
df : dask_cudf.DataFrame
df['vertex'] gives the vertex id
df['distance'][i] gives the path distance for the i'th vertex from the
df['distance'] gives the path distance from the
starting vertex (Only if return_distances is True)
df['predecessor'][i] gives for the i'th vertex the vertex it was
df['predecessor'] gives the vertex it was
reached from in the traversal
Examples
--------
>>> import cugraph.dask as dcg
>>> Comms.initialize()
>>> Comms.initialize(p2p=True)
>>> chunksize = dcg.get_chunksize(input_data_path)
>>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
delimiter=' ',
names=['src', 'dst', 'value'],
dtype=['int32', 'int32', 'float32'])
>>> dg = cugraph.DiGraph()
>>> dg.from_dask_cudf_edgelist(ddf)
>>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst')
>>> df = dcg.bfs(dg, 0)
>>> Comms.destroy()
"""
Expand Down
29 changes: 17 additions & 12 deletions python/cugraph/dask/traversal/sssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,42 +43,47 @@ def sssp(graph,
source):

"""
Find the distances and predecessors for a breadth first traversal of a
graph.
The input graph must contain edge list as dask-cudf dataframe with
Compute the distance and predecessors for shortest paths from the specified
source to all the vertices in the graph. The distances column will store
the distance from the source to each vertex. The predecessors column will
store each vertex's predecessor in the shortest path. Vertices that are
unreachable will have a distance of infinity denoted by the maximum value
of the data type and the predecessor set as -1. The source vertex's
predecessor is also set to -1.
The input graph must contain edge list as dask-cudf dataframe with
one partition per GPU.
Parameters
----------
graph : cugraph.DiGraph
cuGraph graph descriptor, should contain the connectivity information
as dask cudf edge list dataframe(edge weights are not used for this
algorithm). Undirected Graph not currently supported.
as dask cudf edge list dataframe.
Undirected Graph not currently supported.
source : Integer
Specify source vertex
Returns
-------
df : cudf.DataFrame
df['vertex'][i] gives the vertex id of the i'th vertex
df : dask_cudf.DataFrame
df['vertex'] gives the vertex id
df['distance'][i] gives the path distance for the i'th vertex from the
starting vertex (Only if return_distances is True)
df['distance'] gives the path distance from the
starting vertex
df['predecessor'][i] gives for the i'th vertex the vertex it was
df['predecessor'] gives the vertex id it was
reached from in the traversal
Examples
--------
>>> import cugraph.dask as dcg
>>> Comms.initialize()
>>> Comms.initialize(p2p=True)
>>> chunksize = dcg.get_chunksize(input_data_path)
>>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
delimiter=' ',
names=['src', 'dst', 'value'],
dtype=['int32', 'int32', 'float32'])
>>> dg = cugraph.DiGraph()
>>> dg.from_dask_cudf_edgelist(ddf)
>>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst')
>>> df = dcg.sssp(dg, 0)
>>> Comms.destroy()
"""
Expand Down

0 comments on commit d83cff7

Please sign in to comment.