rapidsai · BradReesWork · Nov 24, 2020 · Nov 18, 2020 · Nov 19, 2020 · Nov 19, 2020
@@ -29,6 +29,7 @@
 from cugraph.structure import (
     Graph,
     DiGraph,
+    from_edgelist,
     from_cudf_edgelist,
     from_pandas_edgelist,
     to_pandas_edgelist,
@@ -70,7 +71,7 @@
 
 from cugraph.traversal import (
     bfs,
-    bfs_edges, 
+    bfs_edges,
     sssp,
     shortest_path,
     filter_unreachable,

@@ -14,7 +14,8 @@
 from cugraph.structure.graph import Graph, DiGraph
 from cugraph.structure.number_map import NumberMap
 from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf
-from cugraph.structure.convert_matrix import (from_cudf_edgelist,
+from cugraph.structure.convert_matrix import (from_edgelist,
+                                              from_cudf_edgelist,
                                               from_pandas_edgelist,
                                               to_pandas_edgelist,
                                               from_pandas_adjacency,

@@ -15,8 +15,62 @@
 # issue #146 is addressed, this file's extension should be changed from .pyx to
 # .py and should be located outside the python/cugraph/bindings directory.
 
+import cudf
+
 from cugraph.structure.graph import DiGraph, Graph
 
+# optional dependencies used for handling different input types
+try:
+    import pandas as pd
+except ModuleNotFoundError:
+    pd = None
+
+
+def from_edgelist(df, source='source', destination='destination',
+                  edge_attr=None, create_using=Graph, renumber=True):
+    """
+    Return a new graph created from the edge list representaion.
+
+    Parameters
+    ----------
+    df : cudf.DataFrame, pandas.DataFrame
+        This DataFrame contains columns storing edge source vertices,
+        destination (or target following NetworkX's terminology) vertices, and
+        (optional) weights.
+    source : string or integer
+        This is used to index the source column.
+    destination : string or integer
+        This is used to index the destination (or target following NetworkX's
+        terminology) column.
+    edge_attr : string or integer, optional
+        This pointer can be ``None``. If not, this is used to index the weight
+        column.
+    create_using : cuGraph.Graph
+        Specify the type of Graph to create.  Default is cugraph.Graph
+    renumber : bool
+        If source and destination indices are not in range 0 to V where V
+        is number of vertices, renumber argument should be True.
+
+    Examples
+    --------
+    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
+    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
+    >>> G = cugraph.Graph()
+    >>> G = cugraph.from_edgelist(M, source='0', target='1', weight='2')
+    """
+    df_type = type(df)
+
+    if df_type is cudf.DataFrame:
+        return from_cudf_edgelist(df, source, destination,
+                                  edge_attr, create_using, renumber)
+
+    elif (pd is not None) and (df_type is pd.DataFrame):
+        return from_pandas_edgelist(df, source, destination,
+                                    edge_attr, create_using, renumber)
+
+    else:
+        raise TypeError(f"obj of type {df_type} is not supported.")
+
 
 def from_cudf_edgelist(df, source='source', destination='destination',
                        edge_attr=None, create_using=Graph, renumber=True):
@@ -52,7 +106,6 @@ def from_cudf_edgelist(df, source='source', destination='destination',
     >>>                   dtype=['int32', 'int32', 'float32'], header=None)
     >>> G = cugraph.Graph()
     >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2')
-
     """
     if create_using is Graph:
         G = Graph()

@@ -42,6 +42,13 @@
     import networkx as nx
 
 
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+def setup_function():
+    gc.collect()
+
+
 def compare_series(series_1, series_2):
     assert len(series_1) == len(series_2)
     df = cudf.DataFrame({"series_1": series_1, "series_2": series_2})
@@ -151,15 +158,12 @@ def check_all_two_hops(df, M):
 
 
 def test_version():
-    gc.collect()
     cugraph.__version__
 
 
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_add_edge_list_to_adj_list(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     M = utils.read_csv_for_nx(graph_file)
@@ -180,8 +184,6 @@ def test_add_edge_list_to_adj_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_add_adj_list_to_edge_list(graph_file):
-    gc.collect()
-
     Mnx = utils.read_csv_for_nx(graph_file)
     N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
     Mcsr = scipy.sparse.csr_matrix(
@@ -208,8 +210,6 @@ def test_add_adj_list_to_edge_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_view_edge_list_from_adj_list(graph_file):
-    gc.collect()
-
     Mnx = utils.read_csv_for_nx(graph_file)
     N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
     Mcsr = scipy.sparse.csr_matrix(
@@ -231,8 +231,6 @@ def test_view_edge_list_from_adj_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_delete_edge_list_delete_adj_list(graph_file):
-    gc.collect()
-
     Mnx = utils.read_csv_for_nx(graph_file)
     df = cudf.DataFrame()
     df["src"] = cudf.Series(Mnx["0"])
@@ -261,8 +259,6 @@ def test_delete_edge_list_delete_adj_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file):
-    gc.collect()
-
     Mnx = utils.read_csv_for_nx(graph_file)
     df = cudf.DataFrame()
     df["src"] = cudf.Series(Mnx["0"])
@@ -302,8 +298,6 @@ def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_edges_for_Graph(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     # Create nx Graph
@@ -342,8 +336,6 @@ def test_edges_for_Graph(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_view_edge_list_for_Graph(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     # Create nx Graph
@@ -387,8 +379,6 @@ def test_view_edge_list_for_Graph(graph_file):
 # Test
 @pytest.mark.parametrize('graph_file', utils.DATASETS)
 def test_consolidation(graph_file):
-    gc.collect()
-
     cluster = LocalCUDACluster()
     client = Client(cluster)
     chunksize = dcg.get_chunksize(graph_file)
@@ -423,8 +413,6 @@ def test_consolidation(graph_file):
 # Test
 @pytest.mark.parametrize('graph_file', utils.DATASETS_SMALL)
 def test_two_hop_neighbors(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     G = cugraph.DiGraph()
@@ -444,8 +432,6 @@ def test_two_hop_neighbors(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_degree_functionality(graph_file):
-    gc.collect()
-
     M = utils.read_csv_for_nx(graph_file)
     cu_M = utils.read_csv_file(graph_file)
 
@@ -484,8 +470,6 @@ def test_degree_functionality(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_degrees_functionality(graph_file):
-    gc.collect()
-
     M = utils.read_csv_for_nx(graph_file)
     cu_M = utils.read_csv_file(graph_file)
 
@@ -517,8 +501,6 @@ def test_degrees_functionality(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_number_of_vertices(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
 
     M = utils.read_csv_for_nx(graph_file)
@@ -537,8 +519,6 @@ def test_number_of_vertices(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
 def test_to_directed(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
     M = utils.read_csv_for_nx(graph_file)
@@ -566,8 +546,6 @@ def test_to_directed(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
 def test_to_undirected(graph_file):
-    gc.collect()
-
     # Read data and then convert to directed by dropped some edges
     cu_M = utils.read_csv_file(graph_file)
     cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
@@ -602,8 +580,6 @@ def test_to_undirected(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_has_edge(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
 
@@ -619,8 +595,6 @@ def test_has_edge(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_has_node(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     nodes = cudf.concat([cu_M["0"], cu_M["1"]]).unique()
 
@@ -632,13 +606,10 @@ def test_has_node(graph_file):
         assert G.has_node(n)
 
 
-# Test all combinations of default/managed and pooled/non-pooled allocation
 @pytest.mark.parametrize('graph_file', utils.DATASETS)
 def test_bipartite_api(graph_file):
     # This test only tests the functionality of adding set of nodes and
     # retrieving them. The datasets currently used are not truly bipartite.
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     nodes = cudf.concat([cu_M['0'], cu_M['1']]).unique()
 
@@ -670,8 +641,6 @@ def test_bipartite_api(graph_file):
 # Test
 @pytest.mark.parametrize("graph_file", utils.DATASETS)
 def test_neighbors(graph_file):
-    gc.collect()
-
     cu_M = utils.read_csv_file(graph_file)
     nodes = cudf.concat([cu_M["0"], cu_M["1"]]).unique()
     M = utils.read_csv_for_nx(graph_file)
@@ -687,3 +656,18 @@ def test_neighbors(graph_file):
         cu_neighbors.sort()
         nx_neighbors.sort()
         assert cu_neighbors == nx_neighbors
+
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS)
+def test_from_edgelist(graph_file):
+    """
+    Compare the resulting Graph objs from cugraph.from_edgelist() calls of both
+    a cudf and pandas DataFrame and ensure the results are equal.
+    """
+    cu_M = utils.read_csv_file(graph_file)
+    M = utils.read_csv_for_nx(graph_file)
+
+    G1 = cugraph.from_edgelist(cu_M, source="0", destination="1")
+    G2 = cugraph.from_edgelist(M, source="0", destination="1")
+
+    assert G1.EdgeList == G2.EdgeList