rapidsai · BradReesWork · Nov 24, 2020 · Nov 18, 2020 · Nov 19, 2020 · Nov 19, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@
 - PR #1260 Add katz_centrality mnmg wrapper
 - PR #1264 CuPy sparse matrix input support for WCC, SCC, SSSP, and BFS
 - PR #1265 Implement Hungarian Algorithm
+- PR #1274 Add generic from_edgelist() and from_adjlist() APIs
 
 ## Improvements
 - PR #1227 Pin cmake policies to cmake 3.17 version

@@ -29,6 +29,7 @@
 from cugraph.structure import (
     Graph,
     DiGraph,
+    from_edgelist,
     from_cudf_edgelist,
     from_pandas_edgelist,
     to_pandas_edgelist,
@@ -38,6 +39,7 @@
     to_numpy_array,
     from_numpy_matrix,
     to_numpy_matrix,
+    from_adjlist,
     hypergraph,
     symmetrize,
     symmetrize_df,
@@ -70,7 +72,7 @@
 
 from cugraph.traversal import (
     bfs,
-    bfs_edges, 
+    bfs_edges,
     sssp,
     shortest_path,
     filter_unreachable,

@@ -14,14 +14,16 @@
 from cugraph.structure.graph import Graph, DiGraph
 from cugraph.structure.number_map import NumberMap
 from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf
-from cugraph.structure.convert_matrix import (from_cudf_edgelist,
+from cugraph.structure.convert_matrix import (from_edgelist,
+                                              from_cudf_edgelist,
                                               from_pandas_edgelist,
                                               to_pandas_edgelist,
                                               from_pandas_adjacency,
                                               to_pandas_adjacency,
                                               from_numpy_array,
                                               to_numpy_array,
                                               from_numpy_matrix,
-                                              to_numpy_matrix)
+                                              to_numpy_matrix,
+                                              from_adjlist)
 from cugraph.structure.hypergraph import hypergraph
 from cugraph.structure.shuffle import shuffle
@@ -15,8 +15,133 @@
 # issue #146 is addressed, this file's extension should be changed from .pyx to
 # .py and should be located outside the python/cugraph/bindings directory.
 
+import cudf
+import dask_cudf
+
 from cugraph.structure.graph import DiGraph, Graph
 
+# optional dependencies used for handling different input types
+try:
+    import pandas as pd
+except ModuleNotFoundError:
+    pd = None
+
+
+def from_edgelist(df, source='source', destination='destination',
+                  edge_attr=None, create_using=Graph, renumber=True):
+    """
+    Return a new graph created from the edge list representaion.
+
+    Parameters
+    ----------
+    df : cudf.DataFrame, pandas.DataFrame, dask_cudf.core.DataFrame
+        This DataFrame contains columns storing edge source vertices,
+        destination (or target following NetworkX's terminology) vertices, and
+        (optional) weights.
+    source : string or integer
+        This is used to index the source column.
+    destination : string or integer
+        This is used to index the destination (or target following NetworkX's
+        terminology) column.
+    edge_attr : string or integer, optional
+        This pointer can be ``None``. If not, this is used to index the weight
+        column.
+    create_using : cuGraph.Graph
+        Specify the type of Graph to create.  Default is cugraph.Graph
+    renumber : bool
+        If source and destination indices are not in range 0 to V where V
+        is number of vertices, renumber argument should be True.
+
+    Examples
+    --------
+    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
+    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
+    >>> G = cugraph.Graph()
+    >>> G = cugraph.from_edgelist(M, source='0', destination='1',
+                                  edge_attr='2')
+    """
+    df_type = type(df)
+
+    if df_type is cudf.DataFrame:
+        return from_cudf_edgelist(df, source, destination,
+                                  edge_attr, create_using, renumber)
+
+    elif (pd is not None) and (df_type is pd.DataFrame):
+        return from_pandas_edgelist(df, source, destination,
+                                    edge_attr, create_using, renumber)
+
+    elif df_type is dask_cudf.core.DataFrame:
+        if create_using in [Graph, DiGraph]:
+            G = create_using()
+        else:
+            raise TypeError(f"'create_using' is type {create_using}, must be "
+                            "either a cugraph.Graph or cugraph.DiGraph")
+        G.from_dask_cudf_edgelist(df, source, destination, edge_attr, renumber)
+        return G
+
+    else:
+        raise TypeError(f"obj of type {df_type} is not supported.")
+
+
+def from_adjlist(offsets, indices, values=None, create_using=Graph):
+    """
+    Initializes the graph from cuDF or Pandas Series representing adjacency
+    matrix CSR data and returns a new cugraph.Graph object if 'create_using' is
+    set to cugraph.Graph (the default), or cugraph.DiGraph if 'create_using' is
+    set to cugraph.DiGraph.
+
+    Parameters
+    ----------
+    offsets : cudf.Series, pandas.Series
+        The offsets of a CSR adjacency matrix.
+    indices : cudf.Series, pandas.Series
+        The indices of a CSR adjacency matrix.
+    values : cudf.Series, pandas.Series, or None (default), optional
+        The values in a CSR adjacency matrix, which represent edge weights in a
+        graph. If not provided, the resulting graph is considered unweighted.
+    create_using : cuGraph.Graph
+        Specify the type of Graph to create.  Default is cugraph.Graph
+
+    Examples
+    --------
+    >>> pdf = pd.read_csv('datasets/karate.csv', delimiter=' ',
+    ...                   dtype={0:'int32', 1:'int32', 2:'float32'},
+    ...                   header=None)
+    >>> M = scipy.sparse.coo_matrix((pdf[2],(pdf[0],pdf[1])))
+    >>> M = M.tocsr()
+    >>> offsets = pd.Series(M.indptr)
+    >>> indices = pd.Series(M.indices)
+    >>> G = cugraph.from_adjlist(offsets, indices, None)
+    """
+    offsets_type = type(offsets)
+    indices_type = type(indices)
+    if offsets_type != indices_type:
+        raise TypeError(f"'offsets' type {offsets_type} != 'indices' "
+                        f"type {indices_type}")
+    if values is not None:
+        values_type = type(values)
+        if values_type != offsets_type:
+            raise TypeError(f"'values' type {values_type} != 'offsets' "
+                            f"type {offsets_type}")
+
+    if create_using in [Graph, DiGraph]:
+        G = create_using()
+    else:
+        raise TypeError(f"'create_using' is type {create_using}, must be "
+                        "either a cugraph.Graph or cugraph.DiGraph")
+
+    if offsets_type is cudf.Series:
+        G.from_cudf_adjlist(offsets, indices, values)
+
+    elif (pd is not None) and (offsets_type is pd.Series):
+        G.from_cudf_adjlist(cudf.Series(offsets), cudf.Series(indices),
+                            None if values is None else cudf.Series(values))
+
+    else:
+        raise TypeError(f"obj of type {offsets_type} is not supported.")
+
+    return G
+
 
 def from_cudf_edgelist(df, source='source', destination='destination',
                        edge_attr=None, create_using=Graph, renumber=True):
@@ -52,7 +177,6 @@ def from_cudf_edgelist(df, source='source', destination='destination',
     >>>                   dtype=['int32', 'int32', 'float32'], header=None)
     >>> G = cugraph.Graph()
     >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2')
-
     """
     if create_using is Graph:
         G = Graph()

@@ -539,7 +539,6 @@ def to_numpy_matrix(self):
         """
         Returns the graph adjacency matrix as a NumPy matrix.
         """
-
         np_array = self.to_numpy_array()
         return np.asmatrix(np_array)
 

@@ -28,6 +28,13 @@
 from cugraph.tests import utils
 
 
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
 @pytest.fixture
 def client_connection():
     cluster = LocalCUDACluster()
@@ -44,9 +51,33 @@ def client_connection():
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-def test_compute_local_data(client_connection):
+def test_from_edgelist(client_connection):
+    input_data_path = r"../datasets/karate.csv"
+    chunksize = dcg.get_chunksize(input_data_path)
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        chunksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
 
-    gc.collect()
+    dg1 = cugraph.from_edgelist(
+        ddf, source="src", destination="dst", edge_attr="value",
+        create_using=cugraph.DiGraph)
+
+    dg2 = cugraph.DiGraph()
+    dg2.from_dask_cudf_edgelist(
+        ddf, source="src", destination="dst", edge_attr="value"
+    )
+
+    assert dg1.EdgeList == dg2.EdgeList
+
+
+@pytest.mark.skipif(
+    is_single_gpu(), reason="skipping MG testing on Single GPU system"
+)
+def test_compute_local_data(client_connection):
 
     input_data_path = r"../datasets/karate.csv"
     chunksize = dcg.get_chunksize(input_data_path)

@@ -29,10 +29,15 @@
     import networkx as nx
 
 
-@pytest.mark.parametrize("graph_file", utils.DATASETS)
-def test_to_from_pandas(graph_file):
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
     gc.collect()
 
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS)
+def test_to_from_pandas(graph_file):
     # Read in the graph
     M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
 
@@ -79,8 +84,6 @@ def test_to_from_pandas(graph_file):
 
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_from_to_numpy(graph_file):
-    gc.collect()
-
     # Read in the graph
     M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
 
@@ -145,3 +148,49 @@ def test_from_to_numpy(graph_file):
     res_pdf = res_pdf[['src', 'dst', 'weights']]
 
     assert exp_pdf.equals(res_pdf)
+
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS)
+def test_from_edgelist(graph_file):
+    """
+    Compare the resulting Graph objs from cugraph.from_edgelist() calls of both
+    a cudf and pandas DataFrame and ensure the results are equal.
+    """
+    df = utils.read_csv_file(graph_file)
+    pdf = utils.read_csv_for_nx(graph_file)
+
+    G1 = cugraph.from_edgelist(df, source="0", destination="1")
+    G2 = cugraph.from_edgelist(pdf, source="0", destination="1")
+
+    assert G1.EdgeList == G2.EdgeList
+
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS)
+def test_from_adjlist(graph_file):
+    """
+    Compare the resulting Graph objs from cugraph.from_adjlist() calls of both
+    a cudf and pandas DataFrame and ensure the results are equal.
+    """
+    G = utils.generate_cugraph_graph_from_file(graph_file, directed=True)
+    (cu_offsets, cu_indices, cu_vals) = G.view_adj_list()
+
+    pd_offsets = cu_offsets.to_pandas()
+    pd_indices = cu_indices.to_pandas()
+    if cu_vals is not None:
+        pd_vals = cu_vals.to_pandas()
+    else:
+        pd_vals = None
+
+    # FIXME: should mixing types be allowed?
+    with pytest.raises(TypeError):
+        G1 = cugraph.from_adjlist(cu_offsets, pd_indices)
+    with pytest.raises(TypeError):
+        G1 = cugraph.from_adjlist(cu_offsets, cu_indices, cu_vals,
+                                  create_using=33)
+
+    G1 = cugraph.from_adjlist(cu_offsets, cu_indices, cu_vals,
+                              create_using=cugraph.DiGraph)
+    G2 = cugraph.from_adjlist(pd_offsets, pd_indices, pd_vals,
+                              create_using=cugraph.DiGraph)
+
+    assert G1.AdjList == G2.AdjList
-Original file line number
+Diff line change
@@ Expand Up / @@ -539,7 +539,6 @@ def to_numpy_matrix(self): @@
             """
             Returns the graph adjacency matrix as a NumPy matrix.
             """
             np_array = self.to_numpy_array()
             return np.asmatrix(np_array)
@@ Expand Down @@