Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] FEA generic from_edgelist() and from_adjlist() APIs #1274

Merged
3 changes: 2 additions & 1 deletion python/cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from cugraph.structure import (
Graph,
DiGraph,
from_edgelist,
from_cudf_edgelist,
from_pandas_edgelist,
to_pandas_edgelist,
Expand Down Expand Up @@ -70,7 +71,7 @@

from cugraph.traversal import (
bfs,
bfs_edges,
bfs_edges,
sssp,
shortest_path,
filter_unreachable,
Expand Down
3 changes: 2 additions & 1 deletion python/cugraph/structure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
from cugraph.structure.graph import Graph, DiGraph
from cugraph.structure.number_map import NumberMap
from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf
from cugraph.structure.convert_matrix import (from_cudf_edgelist,
from cugraph.structure.convert_matrix import (from_edgelist,
from_cudf_edgelist,
from_pandas_edgelist,
to_pandas_edgelist,
from_pandas_adjacency,
Expand Down
55 changes: 54 additions & 1 deletion python/cugraph/structure/convert_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,62 @@
# issue #146 is addressed, this file's extension should be changed from .pyx to
# .py and should be located outside the python/cugraph/bindings directory.

import cudf

from cugraph.structure.graph import DiGraph, Graph

# optional dependencies used for handling different input types
try:
import pandas as pd
except ModuleNotFoundError:
pd = None


def from_edgelist(df, source='source', destination='destination',
edge_attr=None, create_using=Graph, renumber=True):
"""
Return a new graph created from the edge list representaion.

Parameters
----------
df : cudf.DataFrame, pandas.DataFrame
This DataFrame contains columns storing edge source vertices,
destination (or target following NetworkX's terminology) vertices, and
(optional) weights.
source : string or integer
This is used to index the source column.
destination : string or integer
This is used to index the destination (or target following NetworkX's
terminology) column.
edge_attr : string or integer, optional
This pointer can be ``None``. If not, this is used to index the weight
column.
create_using : cuGraph.Graph
Specify the type of Graph to create. Default is cugraph.Graph
renumber : bool
If source and destination indices are not in range 0 to V where V
is number of vertices, renumber argument should be True.

Examples
--------
>>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
>>> dtype=['int32', 'int32', 'float32'], header=None)
>>> G = cugraph.Graph()
>>> G = cugraph.from_edgelist(M, source='0', target='1', weight='2')
rlratzel marked this conversation as resolved.
Show resolved Hide resolved
"""
df_type = type(df)

if df_type is cudf.DataFrame:
return from_cudf_edgelist(df, source, destination,
edge_attr, create_using, renumber)

elif (pd is not None) and (df_type is pd.DataFrame):
return from_pandas_edgelist(df, source, destination,
edge_attr, create_using, renumber)

else:
raise TypeError(f"obj of type {df_type} is not supported.")


def from_cudf_edgelist(df, source='source', destination='destination',
edge_attr=None, create_using=Graph, renumber=True):
Expand Down Expand Up @@ -52,7 +106,6 @@ def from_cudf_edgelist(df, source='source', destination='destination',
>>> dtype=['int32', 'int32', 'float32'], header=None)
>>> G = cugraph.Graph()
>>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2')

"""
if create_using is Graph:
G = Graph()
Expand Down
60 changes: 22 additions & 38 deletions python/cugraph/tests/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@
import networkx as nx


# =============================================================================
# Pytest Setup / Teardown - called for each test function
# =============================================================================
def setup_function():
gc.collect()


def compare_series(series_1, series_2):
assert len(series_1) == len(series_2)
df = cudf.DataFrame({"series_1": series_1, "series_2": series_2})
Expand Down Expand Up @@ -151,15 +158,12 @@ def check_all_two_hops(df, M):


def test_version():
gc.collect()
cugraph.__version__


# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_add_edge_list_to_adj_list(graph_file):
gc.collect()

cu_M = utils.read_csv_file(graph_file)

M = utils.read_csv_for_nx(graph_file)
Expand All @@ -180,8 +184,6 @@ def test_add_edge_list_to_adj_list(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_add_adj_list_to_edge_list(graph_file):
gc.collect()

Mnx = utils.read_csv_for_nx(graph_file)
N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
Mcsr = scipy.sparse.csr_matrix(
Expand All @@ -208,8 +210,6 @@ def test_add_adj_list_to_edge_list(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_view_edge_list_from_adj_list(graph_file):
gc.collect()

Mnx = utils.read_csv_for_nx(graph_file)
N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
Mcsr = scipy.sparse.csr_matrix(
Expand All @@ -231,8 +231,6 @@ def test_view_edge_list_from_adj_list(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_delete_edge_list_delete_adj_list(graph_file):
gc.collect()

Mnx = utils.read_csv_for_nx(graph_file)
df = cudf.DataFrame()
df["src"] = cudf.Series(Mnx["0"])
Expand Down Expand Up @@ -261,8 +259,6 @@ def test_delete_edge_list_delete_adj_list(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file):
gc.collect()

Mnx = utils.read_csv_for_nx(graph_file)
df = cudf.DataFrame()
df["src"] = cudf.Series(Mnx["0"])
Expand Down Expand Up @@ -302,8 +298,6 @@ def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_edges_for_Graph(graph_file):
gc.collect()

cu_M = utils.read_csv_file(graph_file)

# Create nx Graph
Expand Down Expand Up @@ -342,8 +336,6 @@ def test_edges_for_Graph(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_view_edge_list_for_Graph(graph_file):
gc.collect()

cu_M = utils.read_csv_file(graph_file)

# Create nx Graph
Expand Down Expand Up @@ -387,8 +379,6 @@ def test_view_edge_list_for_Graph(graph_file):
# Test
@pytest.mark.parametrize('graph_file', utils.DATASETS)
def test_consolidation(graph_file):
gc.collect()

cluster = LocalCUDACluster()
client = Client(cluster)
chunksize = dcg.get_chunksize(graph_file)
Expand Down Expand Up @@ -423,8 +413,6 @@ def test_consolidation(graph_file):
# Test
@pytest.mark.parametrize('graph_file', utils.DATASETS_SMALL)
def test_two_hop_neighbors(graph_file):
gc.collect()

cu_M = utils.read_csv_file(graph_file)

G = cugraph.DiGraph()
Expand All @@ -444,8 +432,6 @@ def test_two_hop_neighbors(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_degree_functionality(graph_file):
gc.collect()

M = utils.read_csv_for_nx(graph_file)
cu_M = utils.read_csv_file(graph_file)

Expand Down Expand Up @@ -484,8 +470,6 @@ def test_degree_functionality(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_degrees_functionality(graph_file):
gc.collect()

M = utils.read_csv_for_nx(graph_file)
cu_M = utils.read_csv_file(graph_file)

Expand Down Expand Up @@ -517,8 +501,6 @@ def test_degrees_functionality(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_number_of_vertices(graph_file):
gc.collect()

cu_M = utils.read_csv_file(graph_file)

M = utils.read_csv_for_nx(graph_file)
Expand All @@ -537,8 +519,6 @@ def test_number_of_vertices(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
def test_to_directed(graph_file):
gc.collect()

cu_M = utils.read_csv_file(graph_file)
cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
M = utils.read_csv_for_nx(graph_file)
Expand Down Expand Up @@ -566,8 +546,6 @@ def test_to_directed(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
def test_to_undirected(graph_file):
gc.collect()

# Read data and then convert to directed by dropped some edges
cu_M = utils.read_csv_file(graph_file)
cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
Expand Down Expand Up @@ -602,8 +580,6 @@ def test_to_undirected(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_has_edge(graph_file):
gc.collect()

cu_M = utils.read_csv_file(graph_file)
cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)

Expand All @@ -619,8 +595,6 @@ def test_has_edge(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_has_node(graph_file):
gc.collect()

cu_M = utils.read_csv_file(graph_file)
nodes = cudf.concat([cu_M["0"], cu_M["1"]]).unique()

Expand All @@ -632,13 +606,10 @@ def test_has_node(graph_file):
assert G.has_node(n)


# Test all combinations of default/managed and pooled/non-pooled allocation
@pytest.mark.parametrize('graph_file', utils.DATASETS)
def test_bipartite_api(graph_file):
# This test only tests the functionality of adding set of nodes and
# retrieving them. The datasets currently used are not truly bipartite.
gc.collect()

cu_M = utils.read_csv_file(graph_file)
nodes = cudf.concat([cu_M['0'], cu_M['1']]).unique()

Expand Down Expand Up @@ -670,8 +641,6 @@ def test_bipartite_api(graph_file):
# Test
@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_neighbors(graph_file):
gc.collect()

cu_M = utils.read_csv_file(graph_file)
nodes = cudf.concat([cu_M["0"], cu_M["1"]]).unique()
M = utils.read_csv_for_nx(graph_file)
Expand All @@ -687,3 +656,18 @@ def test_neighbors(graph_file):
cu_neighbors.sort()
nx_neighbors.sort()
assert cu_neighbors == nx_neighbors


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_from_edgelist(graph_file):
"""
Compare the resulting Graph objs from cugraph.from_edgelist() calls of both
a cudf and pandas DataFrame and ensure the results are equal.
"""
cu_M = utils.read_csv_file(graph_file)
M = utils.read_csv_for_nx(graph_file)

G1 = cugraph.from_edgelist(cu_M, source="0", destination="1")
G2 = cugraph.from_edgelist(M, source="0", destination="1")

assert G1.EdgeList == G2.EdgeList