From 5adf76f2a965d9b9350dc625e79a020f5b253aa5 Mon Sep 17 00:00:00 2001
From: Rick Ratzel <rratzel@nvidia.com>
Date: Wed, 18 Nov 2020 11:23:37 -0600
Subject: [PATCH 1/3] Initial version and test for cugraph.from_edgelist().

---
 python/cugraph/__init__.py                 |  3 +-
 python/cugraph/structure/__init__.py       |  3 +-
 python/cugraph/structure/convert_matrix.py | 55 +++++++++++++++++++-
 python/cugraph/tests/test_graph.py         | 60 ++++++++--------------
 4 files changed, 80 insertions(+), 41 deletions(-)

diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py
index feab1cc3eb9..5848489e6fa 100644
--- a/python/cugraph/__init__.py
+++ b/python/cugraph/__init__.py
@@ -29,6 +29,7 @@
 from cugraph.structure import (
     Graph,
     DiGraph,
+    from_edgelist,
     from_cudf_edgelist,
     from_pandas_edgelist,
     to_pandas_edgelist,
@@ -70,7 +71,7 @@
 
 from cugraph.traversal import (
     bfs,
-    bfs_edges, 
+    bfs_edges,
     sssp,
     shortest_path,
     filter_unreachable,
diff --git a/python/cugraph/structure/__init__.py b/python/cugraph/structure/__init__.py
index b8b6fbe0435..f148c6dd9d7 100644
--- a/python/cugraph/structure/__init__.py
+++ b/python/cugraph/structure/__init__.py
@@ -14,7 +14,8 @@
 from cugraph.structure.graph import Graph, DiGraph
 from cugraph.structure.number_map import NumberMap
 from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf
-from cugraph.structure.convert_matrix import (from_cudf_edgelist,
+from cugraph.structure.convert_matrix import (from_edgelist,
+                                              from_cudf_edgelist,
                                               from_pandas_edgelist,
                                               to_pandas_edgelist,
                                               from_pandas_adjacency,
diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py
index 8acdc7e1799..750af97ea01 100644
--- a/python/cugraph/structure/convert_matrix.py
+++ b/python/cugraph/structure/convert_matrix.py
@@ -15,8 +15,62 @@
 # issue #146 is addressed, this file's extension should be changed from .pyx to
 # .py and should be located outside the python/cugraph/bindings directory.
 
+import cudf
+
 from cugraph.structure.graph import DiGraph, Graph
 
+# optional dependencies used for handling different input types
+try:
+    import pandas as pd
+except ModuleNotFoundError:
+    pd = None
+
+
+def from_edgelist(df, source='source', destination='destination',
+                  edge_attr=None, create_using=Graph, renumber=True):
+    """
+    Return a new graph created from the edge list representaion.
+
+    Parameters
+    ----------
+    df : cudf.DataFrame, pandas.DataFrame
+        This DataFrame contains columns storing edge source vertices,
+        destination (or target following NetworkX's terminology) vertices, and
+        (optional) weights.
+    source : string or integer
+        This is used to index the source column.
+    destination : string or integer
+        This is used to index the destination (or target following NetworkX's
+        terminology) column.
+    edge_attr : string or integer, optional
+        This pointer can be ``None``. If not, this is used to index the weight
+        column.
+    create_using : cuGraph.Graph
+        Specify the type of Graph to create.  Default is cugraph.Graph
+    renumber : bool
+        If source and destination indices are not in range 0 to V where V
+        is number of vertices, renumber argument should be True.
+
+    Examples
+    --------
+    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
+    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
+    >>> G = cugraph.Graph()
+    >>> G = cugraph.from_edgelist(M, source='0', target='1', weight='2')
+    """
+    df_type = type(df)
+
+    if df_type is cudf.DataFrame:
+        return from_cudf_edgelist(df, source, destination,
+                                  edge_attr, create_using, renumber)
+
+    elif (pd is not None) and (df_type is pd.DataFrame):
+        return from_pandas_edgelist(df, source, destination,
+                                    edge_attr, create_using, renumber)
+
+    else:
+        raise TypeError(f"obj of type {df_type} is not supported.")
+
 
 def from_cudf_edgelist(df, source='source', destination='destination',
                        edge_attr=None, create_using=Graph, renumber=True):
@@ -52,7 +106,6 @@ def from_cudf_edgelist(df, source='source', destination='destination',
     >>>                   dtype=['int32', 'int32', 'float32'], header=None)
     >>> G = cugraph.Graph()
     >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2')
-
     """
     if create_using is Graph:
         G = Graph()
diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py
index 59d0d5c4e09..2a6b2a37f2b 100644
--- a/python/cugraph/tests/test_graph.py
+++ b/python/cugraph/tests/test_graph.py
@@ -42,6 +42,13 @@
     import networkx as nx
 
 
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
 def compare_series(series_1, series_2):
     assert len(series_1) == len(series_2)
     df = cudf.DataFrame({"series_1": series_1, "series_2": series_2})
@@ -151,15 +158,12 @@ def check_all_two_hops(df, M):
 
 
 def test_version():
-    gc.collect()
     cugraph.__version__
 
 
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_add_edge_list_to_adj_list(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     M = utils.read_csv_for_nx(graph_file)
@@ -180,8 +184,6 @@ def test_add_edge_list_to_adj_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_add_adj_list_to_edge_list(graph_file):
-    gc.collect()
-
     Mnx = utils.read_csv_for_nx(graph_file)
     N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
     Mcsr = scipy.sparse.csr_matrix(
@@ -208,8 +210,6 @@ def test_add_adj_list_to_edge_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_view_edge_list_from_adj_list(graph_file):
-    gc.collect()
-
     Mnx = utils.read_csv_for_nx(graph_file)
     N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
     Mcsr = scipy.sparse.csr_matrix(
@@ -231,8 +231,6 @@ def test_view_edge_list_from_adj_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_delete_edge_list_delete_adj_list(graph_file):
-    gc.collect()
-
     Mnx = utils.read_csv_for_nx(graph_file)
     df = cudf.DataFrame()
     df["src"] = cudf.Series(Mnx["0"])
@@ -261,8 +259,6 @@ def test_delete_edge_list_delete_adj_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file):
-    gc.collect()
-
     Mnx = utils.read_csv_for_nx(graph_file)
     df = cudf.DataFrame()
     df["src"] = cudf.Series(Mnx["0"])
@@ -302,8 +298,6 @@ def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_edges_for_Graph(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     # Create nx Graph
@@ -342,8 +336,6 @@ def test_edges_for_Graph(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_view_edge_list_for_Graph(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     # Create nx Graph
@@ -387,8 +379,6 @@ def test_view_edge_list_for_Graph(graph_file):
 # Test
 @pytest.mark.parametrize('graph_file', utils.DATASETS)
 def test_consolidation(graph_file):
-    gc.collect()
-
     cluster = LocalCUDACluster()
     client = Client(cluster)
     chunksize = dcg.get_chunksize(graph_file)
@@ -423,8 +413,6 @@ def test_consolidation(graph_file):
 # Test
 @pytest.mark.parametrize('graph_file', utils.DATASETS_SMALL)
 def test_two_hop_neighbors(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     G = cugraph.DiGraph()
@@ -444,8 +432,6 @@ def test_two_hop_neighbors(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_degree_functionality(graph_file):
-    gc.collect()
-
     M = utils.read_csv_for_nx(graph_file)
     cu_M = utils.read_csv_file(graph_file)
 
@@ -484,8 +470,6 @@ def test_degree_functionality(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_degrees_functionality(graph_file):
-    gc.collect()
-
     M = utils.read_csv_for_nx(graph_file)
     cu_M = utils.read_csv_file(graph_file)
 
@@ -517,8 +501,6 @@ def test_degrees_functionality(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_number_of_vertices(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     M = utils.read_csv_for_nx(graph_file)
@@ -537,8 +519,6 @@ def test_number_of_vertices(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
 def test_to_directed(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
     M = utils.read_csv_for_nx(graph_file)
@@ -566,8 +546,6 @@ def test_to_directed(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
 def test_to_undirected(graph_file):
-    gc.collect()
-
     # Read data and then convert to directed by dropped some edges
     cu_M = utils.read_csv_file(graph_file)
     cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
@@ -602,8 +580,6 @@ def test_to_undirected(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_has_edge(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
 
@@ -619,8 +595,6 @@ def test_has_edge(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_has_node(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     nodes = cudf.concat([cu_M["0"], cu_M["1"]]).unique()
 
@@ -632,13 +606,10 @@ def test_has_node(graph_file):
         assert G.has_node(n)
 
 
-# Test all combinations of default/managed and pooled/non-pooled allocation
 @pytest.mark.parametrize('graph_file', utils.DATASETS)
 def test_bipartite_api(graph_file):
     # This test only tests the functionality of adding set of nodes and
     # retrieving them. The datasets currently used are not truly bipartite.
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     nodes = cudf.concat([cu_M['0'], cu_M['1']]).unique()
 
@@ -670,8 +641,6 @@ def test_bipartite_api(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_neighbors(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     nodes = cudf.concat([cu_M["0"], cu_M["1"]]).unique()
     M = utils.read_csv_for_nx(graph_file)
@@ -687,3 +656,18 @@ def test_neighbors(graph_file):
         cu_neighbors.sort()
         nx_neighbors.sort()
         assert cu_neighbors == nx_neighbors
+
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS)
+def test_from_edgelist(graph_file):
+    """
+    Compare the resulting Graph objs from cugraph.from_edgelist() calls of both
+    a cudf and pandas DataFrame and ensure the results are equal.
+    """
+    cu_M = utils.read_csv_file(graph_file)
+    M = utils.read_csv_for_nx(graph_file)
+
+    G1 = cugraph.from_edgelist(cu_M, source="0", destination="1")
+    G2 = cugraph.from_edgelist(M, source="0", destination="1")
+
+    assert G1.EdgeList == G2.EdgeList

From 11bd691110c0c97780d8ab474db43894b1e48908 Mon Sep 17 00:00:00 2001
From: Rick Ratzel <rratzel@nvidia.com>
Date: Thu, 19 Nov 2020 15:36:15 -0600
Subject: [PATCH 2/3] Fixed example in docstring, added PR 1274 to CHANGELOG.md

---
 CHANGELOG.md                               | 1 +
 python/cugraph/structure/convert_matrix.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d1802221c14..017ff151ab1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## New Features
 - PR #1245 Add functions to add pandas and numpy compatibility
 - PR #1260 Add katz_centrality mnmg wrapper
+- PR #1274 Add generic from_edgelist() API
 
 ## Improvements
 - PR #1227 Pin cmake policies to cmake 3.17 version
diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py
index 750af97ea01..614315bbc52 100644
--- a/python/cugraph/structure/convert_matrix.py
+++ b/python/cugraph/structure/convert_matrix.py
@@ -56,7 +56,7 @@ def from_edgelist(df, source='source', destination='destination',
     >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
     >>>                   dtype=['int32', 'int32', 'float32'], header=None)
     >>> G = cugraph.Graph()
-    >>> G = cugraph.from_edgelist(M, source='0', target='1', weight='2')
+    >>> G = cugraph.from_edgelist(M, source='0', destination='1', edge_attr='2')
     """
     df_type = type(df)
 

From 5ea02102a8eed2b83f616600d1d69ed46c585b18 Mon Sep 17 00:00:00 2001
From: Rick Ratzel <rratzel@nvidia.com>
Date: Mon, 23 Nov 2020 20:06:10 -0600
Subject: [PATCH 3/3] Added support for dask_cudf DataFrame to from_edgelist().

---
 python/cugraph/structure/convert_matrix.py   | 12 ++++++-
 python/cugraph/tests/dask/test_mg_utility.py | 35 ++++++++++++++++++--
 python/cugraph/tests/test_convert_matrix.py  | 11 +++---
 3 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py
index e8d89541653..edd1c630185 100644
--- a/python/cugraph/structure/convert_matrix.py
+++ b/python/cugraph/structure/convert_matrix.py
@@ -16,6 +16,7 @@
 # .py and should be located outside the python/cugraph/bindings directory.
 
 import cudf
+import dask_cudf
 
 from cugraph.structure.graph import DiGraph, Graph
 
@@ -33,7 +34,7 @@ def from_edgelist(df, source='source', destination='destination',
 
     Parameters
     ----------
-    df : cudf.DataFrame, pandas.DataFrame
+    df : cudf.DataFrame, pandas.DataFrame, dask_cudf.core.DataFrame
         This DataFrame contains columns storing edge source vertices,
         destination (or target following NetworkX's terminology) vertices, and
         (optional) weights.
@@ -69,6 +70,15 @@ def from_edgelist(df, source='source', destination='destination',
         return from_pandas_edgelist(df, source, destination,
                                     edge_attr, create_using, renumber)
 
+    elif df_type is dask_cudf.core.DataFrame:
+        if create_using in [Graph, DiGraph]:
+            G = create_using()
+        else:
+            raise TypeError(f"'create_using' is type {create_using}, must be "
+                            "either a cugraph.Graph or cugraph.DiGraph")
+        G.from_dask_cudf_edgelist(df, source, destination, edge_attr, renumber)
+        return G
+
     else:
         raise TypeError(f"obj of type {df_type} is not supported.")
 
diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py
index e802a65c37f..808f1bcfa70 100644
--- a/python/cugraph/tests/dask/test_mg_utility.py
+++ b/python/cugraph/tests/dask/test_mg_utility.py
@@ -28,6 +28,13 @@
 from cugraph.tests import utils
 
 
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
 @pytest.fixture
 def client_connection():
     cluster = LocalCUDACluster()
@@ -44,9 +51,33 @@ def client_connection():
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-def test_compute_local_data(client_connection):
+def test_from_edgelist(client_connection):
+    input_data_path = r"../datasets/karate.csv"
+    chunksize = dcg.get_chunksize(input_data_path)
+    ddf = dask_cudf.read_csv(
+        input_data_path,
+        chunksize=chunksize,
+        delimiter=" ",
+        names=["src", "dst", "value"],
+        dtype=["int32", "int32", "float32"],
+    )
 
-    gc.collect()
+    dg1 = cugraph.from_edgelist(
+        ddf, source="src", destination="dst", edge_attr="value",
+        create_using=cugraph.DiGraph)
+
+    dg2 = cugraph.DiGraph()
+    dg2.from_dask_cudf_edgelist(
+        ddf, source="src", destination="dst", edge_attr="value"
+    )
+
+    assert dg1.EdgeList == dg2.EdgeList
+
+
+@pytest.mark.skipif(
+    is_single_gpu(), reason="skipping MG testing on Single GPU system"
+)
+def test_compute_local_data(client_connection):
 
     input_data_path = r"../datasets/karate.csv"
     chunksize = dcg.get_chunksize(input_data_path)
diff --git a/python/cugraph/tests/test_convert_matrix.py b/python/cugraph/tests/test_convert_matrix.py
index f2dd59071a8..d418dd7ce2e 100644
--- a/python/cugraph/tests/test_convert_matrix.py
+++ b/python/cugraph/tests/test_convert_matrix.py
@@ -29,10 +29,15 @@
     import networkx as nx
 
 
-@pytest.mark.parametrize("graph_file", utils.DATASETS)
-def test_to_from_pandas(graph_file):
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
     gc.collect()
 
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS)
+def test_to_from_pandas(graph_file):
     # Read in the graph
     M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
 
@@ -79,8 +84,6 @@ def test_to_from_pandas(graph_file):
 
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_from_to_numpy(graph_file):
-    gc.collect()
-
     # Read in the graph
     M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)