Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gpuCI] Auto-merge branch-0.15 to branch-0.16 [skip ci] #1044

Merged
merged 11 commits into from
Aug 7, 2020
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@
- PR #1025: Explicitly skip raft test folder for pytest 6.0.0
- PR #1027 Fix documentation
- PR #1033 Fix reparition error in big datasets, updated coroutine, fixed warnings
- PR #1036 Fixed benchmarks for new renumbering API, updated comments, added quick test-only benchmark run to CI
- PR #1040 Fix spectral clustering renumbering issue

# cuGraph 0.14.0 (03 Jun 2020)

Expand Down
5 changes: 2 additions & 3 deletions benchmarks/bench_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def setFixtureParamNames(*args, **kwargs):
pass

import cugraph
from cugraph.structure.number_map import NumberMap
from cugraph.tests import utils
import rmm

Expand Down Expand Up @@ -174,9 +175,7 @@ def bench_create_digraph(gpubenchmark, edgelistCreated):

@pytest.mark.ETL
def bench_renumber(gpubenchmark, edgelistCreated):
gpubenchmark(cugraph.renumber,
edgelistCreated["0"], # src
edgelistCreated["1"]) # dst
gpubenchmark(NumberMap.renumber, edgelistCreated, "0", "1")


def bench_pagerank(gpubenchmark, anyGraphWithTransposedAdjListComputed):
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def genFixtureParamsProduct(*args):
# https://docs.rapids.ai/maintainers/datasets
# FIXME: rlr: soc-twitter-2010.csv crashes with OOM error on my RTX-8000
UNDIRECTED_DATASETS = [
pytest.param("../datasets/karate.csv",
marks=[pytest.mark.tiny, pytest.mark.undirected]),
pytest.param("../datasets/csv/undirected/hollywood.csv",
marks=[pytest.mark.small, pytest.mark.undirected]),
pytest.param("../datasets/csv/undirected/europe_osm.csv",
Expand Down
1 change: 1 addition & 0 deletions benchmarks/pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ markers =
poolallocator_off: RMM pool allocator disabled
ETL: benchmarks for ETL steps
small: small datasets
tiny: tiny datasets
directed: directed datasets
undirected: undirected datasets

Expand Down
5 changes: 3 additions & 2 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul
"dask-cuda=${MINOR_VERSION}" \
"ucx-py=${MINOR_VERSION}" \
"rapids-build-env=$MINOR_VERSION.*" \
"rapids-notebook-env=$MINOR_VERSION.*"
"rapids-notebook-env=$MINOR_VERSION.*" \
rapids-pytest-benchmark

# https://docs.rapids.ai/maintainers/depmgmt/
# https://docs.rapids.ai/maintainers/depmgmt/
# conda remove --force rapids-build-env rapids-notebook-env
# conda install "your-pkg=1.0.0"

Expand Down
5 changes: 5 additions & 0 deletions ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,9 @@ cd ${CUGRAPH_ROOT}/python
pytest --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph.xml -v --cov-config=.coveragerc --cov=cugraph --cov-report=xml:${WORKSPACE}/python/cugraph/cugraph-coverage.xml --cov-report term --ignore=cugraph/raft
ERRORCODE=$((ERRORCODE | $?))

echo "Python benchmarks for cuGraph (running as tests)..."
cd ${CUGRAPH_ROOT}/benchmarks
pytest -v -m "managedmem_on and poolallocator_on and tiny" --benchmark-disable
ERRORCODE=$((ERRORCODE | $?))

exit ${ERRORCODE}
6 changes: 5 additions & 1 deletion python/cugraph/community/spectral_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ def spectralBalancedCutClustering(
)

if G.renumbered:
df = G.unrenumber(df, "vertex")
# FIXME: This is a hack to get around an
# API problem. The spectral API assumes that
# the data frame remains in internal vertex
# id order. It should not do that.
df = G.unrenumber(df, "vertex", preserve_order=True)

return df

Expand Down
6 changes: 5 additions & 1 deletion python/cugraph/layout/force_atlas2.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,11 @@ def on_train_end(self, positions):
verbose=verbose,
callback=callback,
)

# If the caller passed in a pos_list, those values are already mapped to
# original numbering in the call to force_atlas2_wrapper.force_atlas2(),
# but if the caller did not specify a pos_list and the graph was
# renumbered, the pos dataframe should be mapped back to the original
# numbering.
if pos_list is None and input_graph.renumbered:
pos = input_graph.unrenumber(pos, "vertex")

Expand Down
18 changes: 2 additions & 16 deletions python/cugraph/structure/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ def to_directed(self):
if type(self) is Graph:
DiG = DiGraph()
DiG.renumbered = self.renumbered
DiG.renumber_map = self.renumber_map
DiG.edgelist = self.edgelist
DiG.adjlist = self.adjlist
DiG.transposedadjlist = self.transposedadjlist
Expand Down Expand Up @@ -964,6 +965,7 @@ def to_undirected(self):
G = Graph()
df = self.edgelist.edgelist_df
G.renumbered = self.renumbered
G.renumber_map = self.renumber_map
if self.edgelist.weights:
source_col, dest_col, value_col = symmetrize(
df["src"], df["dst"], df["weights"]
Expand Down Expand Up @@ -1103,22 +1105,6 @@ def unrenumber(self, df, column_name, preserve_order=False):
The original DataFrame columns exist unmodified. The external
vertex identifiers are added to the DataFrame, the internal
vertex identifier column is removed from the dataframe.

Examples
--------
>>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
>>> dtype=['int32', 'int32', 'float32'], header=None)
>>>
>>> df, number_map = NumberMap.renumber(df, '0', '1')
>>>
>>> G = cugraph.Graph()
>>> G.from_cudf_edgelist(df, 'src', 'dst')
>>>
>>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500,
>>> tol = 1.0e-05)
>>>
>>> pr = number_map.unrenumber(pr, 'vertex')
>>>
"""
return self.renumber_map.unrenumber(df, column_name, preserve_order)

Expand Down
24 changes: 12 additions & 12 deletions python/cugraph/structure/number_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,10 +669,10 @@ def column_names(self):
"""
return self.implementation.col_names

def renumber(df, source_columns, dest_columns, preserve_order=False):
def renumber(df, src_col_names, dst_col_names, preserve_order=False):
"""
Given a single GPU or distributed DataFrame, use source_columns and
dest_columns to identify the source vertex identifiers and destination
Given a single GPU or distributed DataFrame, use src_col_names and
dst_col_names to identify the source vertex identifiers and destination
vertex identifiers, respectively.

Internal vertex identifiers will be created, numbering vertices as
Expand All @@ -694,11 +694,11 @@ def renumber(df, source_columns, dest_columns, preserve_order=False):
df: cudf.DataFrame or dask_cudf.DataFrame
Contains a list of external vertex identifiers that will be
numbered by the NumberMap class.
src_col_names: list of strings
src_col_names: string or list of strings
This list of 1 or more strings contain the names
of the columns that uniquely identify an external
vertex identifier for source vertices
dst_col_names: list of strings
dst_col_names: string or list of strings
This list of 1 or more strings contain the names
of the columns that uniquely identify an external
vertex identifier for destination vertices
Expand Down Expand Up @@ -729,25 +729,25 @@ def renumber(df, source_columns, dest_columns, preserve_order=False):
"""
renumber_map = NumberMap()

if isinstance(source_columns, list):
renumber_map.from_dataframe(df, source_columns, dest_columns)
if isinstance(src_col_names, list):
renumber_map.from_dataframe(df, src_col_names, dst_col_names)
df = renumber_map.add_internal_vertex_id(
df, "src", source_columns, drop=True,
df, "src", src_col_names, drop=True,
preserve_order=preserve_order
)
df = renumber_map.add_internal_vertex_id(
df, "dst", dest_columns, drop=True,
df, "dst", dst_col_names, drop=True,
preserve_order=preserve_order
)
else:
renumber_map.from_dataframe(df, [source_columns], [dest_columns])
renumber_map.from_dataframe(df, [src_col_names], [dst_col_names])
df = renumber_map.add_internal_vertex_id(
df, "src", source_columns, drop=True,
df, "src", src_col_names, drop=True,
preserve_order=preserve_order
)

df = renumber_map.add_internal_vertex_id(
df, "dst", dest_columns, drop=True,
df, "dst", dst_col_names, drop=True,
preserve_order=preserve_order
)

Expand Down