Skip to content

Commit

Permalink
Merge pull request #1044 from rapidsai/branch-0.15
Browse files Browse the repository at this point in the history
[gpuCI] Auto-merge branch-0.15 to branch-0.16 [skip ci]
  • Loading branch information
GPUtester authored Aug 7, 2020
2 parents 8de1e40 + cc76db2 commit 1962012
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 35 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@
- PR #1025: Explicitly skip raft test folder for pytest 6.0.0
- PR #1027 Fix documentation
- PR #1033 Fix reparition error in big datasets, updated coroutine, fixed warnings
- PR #1036 Fixed benchmarks for new renumbering API, updated comments, added quick test-only benchmark run to CI
- PR #1040 Fix spectral clustering renumbering issue

# cuGraph 0.14.0 (03 Jun 2020)

Expand Down
5 changes: 2 additions & 3 deletions benchmarks/bench_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def setFixtureParamNames(*args, **kwargs):
pass

import cugraph
from cugraph.structure.number_map import NumberMap
from cugraph.tests import utils
import rmm

Expand Down Expand Up @@ -174,9 +175,7 @@ def bench_create_digraph(gpubenchmark, edgelistCreated):

@pytest.mark.ETL
def bench_renumber(gpubenchmark, edgelistCreated):
gpubenchmark(cugraph.renumber,
edgelistCreated["0"], # src
edgelistCreated["1"]) # dst
gpubenchmark(NumberMap.renumber, edgelistCreated, "0", "1")


def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed):
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def genFixtureParamsProduct(*args):
# https://docs.rapids.ai/maintainers/datasets
# FIXME: rlr: soc-twitter-2010.csv crashes with OOM error on my RTX-8000
UNDIRECTED_DATASETS = [
pytest.param("../datasets/karate.csv",
marks=[pytest.mark.tiny, pytest.mark.undirected]),
pytest.param("../datasets/csv/undirected/hollywood.csv",
marks=[pytest.mark.small, pytest.mark.undirected]),
pytest.param("../datasets/csv/undirected/europe_osm.csv",
Expand Down
1 change: 1 addition & 0 deletions benchmarks/pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ markers =
poolallocator_off: RMM pool allocator disabled
ETL: benchmarks for ETL steps
small: small datasets
tiny: tiny datasets
directed: directed datasets
undirected: undirected datasets

Expand Down
5 changes: 3 additions & 2 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul
"dask-cuda=${MINOR_VERSION}" \
"ucx-py=${MINOR_VERSION}" \
"rapids-build-env=$MINOR_VERSION.*" \
"rapids-notebook-env=$MINOR_VERSION.*"
"rapids-notebook-env=$MINOR_VERSION.*" \
rapids-pytest-benchmark

# https://docs.rapids.ai/maintainers/depmgmt/
# https://docs.rapids.ai/maintainers/depmgmt/
# conda remove --force rapids-build-env rapids-notebook-env
# conda install "your-pkg=1.0.0"

Expand Down
5 changes: 5 additions & 0 deletions ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,9 @@ cd ${CUGRAPH_ROOT}/python
pytest --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph.xml -v --cov-config=.coveragerc --cov=cugraph --cov-report=xml:${WORKSPACE}/python/cugraph/cugraph-coverage.xml --cov-report term --ignore=cugraph/raft
ERRORCODE=$((ERRORCODE | $?))

echo "Python benchmarks for cuGraph (running as tests)..."
cd ${CUGRAPH_ROOT}/benchmarks
pytest -v -m "managedmem_on and poolallocator_on and tiny" --benchmark-disable
ERRORCODE=$((ERRORCODE | $?))

exit ${ERRORCODE}
6 changes: 5 additions & 1 deletion python/cugraph/community/spectral_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ def spectralBalancedCutClustering(
)

if G.renumbered:
df = G.unrenumber(df, "vertex")
# FIXME: This is a hack to get around an
# API problem. The spectral API assumes that
# the data frame remains in internal vertex
# id order. It should not do that.
df = G.unrenumber(df, "vertex", preserve_order=True)

return df

Expand Down
6 changes: 5 additions & 1 deletion python/cugraph/layout/force_atlas2.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,11 @@ def on_train_end(self, positions):
verbose=verbose,
callback=callback,
)

# If the caller passed in a pos_list, those values are already mapped to
# original numbering in the call to force_atlas2_wrapper.force_atlas2(),
# but if the caller did not specify a pos_list and the graph was
# renumbered, the pos dataframe should be mapped back to the original
# numbering.
if pos_list is None and input_graph.renumbered:
pos = input_graph.unrenumber(pos, "vertex")

Expand Down
18 changes: 2 additions & 16 deletions python/cugraph/structure/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ def to_directed(self):
if type(self) is Graph:
DiG = DiGraph()
DiG.renumbered = self.renumbered
DiG.renumber_map = self.renumber_map
DiG.edgelist = self.edgelist
DiG.adjlist = self.adjlist
DiG.transposedadjlist = self.transposedadjlist
Expand Down Expand Up @@ -964,6 +965,7 @@ def to_undirected(self):
G = Graph()
df = self.edgelist.edgelist_df
G.renumbered = self.renumbered
G.renumber_map = self.renumber_map
if self.edgelist.weights:
source_col, dest_col, value_col = symmetrize(
df["src"], df["dst"], df["weights"]
Expand Down Expand Up @@ -1103,22 +1105,6 @@ def unrenumber(self, df, column_name, preserve_order=False):
The original DataFrame columns exist unmodified. The external
vertex identifiers are added to the DataFrame, the internal
vertex identifier column is removed from the dataframe.
Examples
--------
>>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
>>> dtype=['int32', 'int32', 'float32'], header=None)
>>>
>>> df, number_map = NumberMap.renumber(df, '0', '1')
>>>
>>> G = cugraph.Graph()
>>> G.from_cudf_edgelist(df, 'src', 'dst')
>>>
>>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500,
>>> tol = 1.0e-05)
>>>
>>> pr = number_map.unrenumber(pr, 'vertex')
>>>
"""
return self.renumber_map.unrenumber(df, column_name, preserve_order)

Expand Down
24 changes: 12 additions & 12 deletions python/cugraph/structure/number_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,10 +669,10 @@ def column_names(self):
"""
return self.implementation.col_names

def renumber(df, source_columns, dest_columns, preserve_order=False):
def renumber(df, src_col_names, dst_col_names, preserve_order=False):
"""
Given a single GPU or distributed DataFrame, use source_columns and
dest_columns to identify the source vertex identifiers and destination
Given a single GPU or distributed DataFrame, use src_col_names and
dst_col_names to identify the source vertex identifiers and destination
vertex identifiers, respectively.
Internal vertex identifiers will be created, numbering vertices as
Expand All @@ -694,11 +694,11 @@ def renumber(df, source_columns, dest_columns, preserve_order=False):
df: cudf.DataFrame or dask_cudf.DataFrame
Contains a list of external vertex identifiers that will be
numbered by the NumberMap class.
src_col_names: list of strings
src_col_names: string or list of strings
This list of 1 or more strings contain the names
of the columns that uniquely identify an external
vertex identifier for source vertices
dst_col_names: list of strings
dst_col_names: string or list of strings
This list of 1 or more strings contain the names
of the columns that uniquely identify an external
vertex identifier for destination vertices
Expand Down Expand Up @@ -729,25 +729,25 @@ def renumber(df, source_columns, dest_columns, preserve_order=False):
"""
renumber_map = NumberMap()

if isinstance(source_columns, list):
renumber_map.from_dataframe(df, source_columns, dest_columns)
if isinstance(src_col_names, list):
renumber_map.from_dataframe(df, src_col_names, dst_col_names)
df = renumber_map.add_internal_vertex_id(
df, "src", source_columns, drop=True,
df, "src", src_col_names, drop=True,
preserve_order=preserve_order
)
df = renumber_map.add_internal_vertex_id(
df, "dst", dest_columns, drop=True,
df, "dst", dst_col_names, drop=True,
preserve_order=preserve_order
)
else:
renumber_map.from_dataframe(df, [source_columns], [dest_columns])
renumber_map.from_dataframe(df, [src_col_names], [dst_col_names])
df = renumber_map.add_internal_vertex_id(
df, "src", source_columns, drop=True,
df, "src", src_col_names, drop=True,
preserve_order=preserve_order
)

df = renumber_map.add_internal_vertex_id(
df, "dst", dest_columns, drop=True,
df, "dst", dst_col_names, drop=True,
preserve_order=preserve_order
)

Expand Down

0 comments on commit 1962012

Please sign in to comment.