Merge 0a6d3b2 into 1c7142e

rapidsai · May 3, 2021 · f3c1029 · f3c1029
2 parents 1c7142e + 0a6d3b2
commit f3c1029
Show file tree

Hide file tree

Showing 15 changed files with 284 additions and 51 deletions.
diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py
@@ -106,7 +106,11 @@ def katz_centrality(
 
     if nstart is not None:
         if G.renumbered is True:
-            nstart = G.add_internal_vertex_id(nstart, 'vertex', 'vertex')
+            if len(G.renumber_map.implementation.col_names) > 1:
+                cols = nstart.columns[:-1].to_list()
+            else:
+                cols = 'vertex'
+            nstart = G.add_internal_vertex_id(nstart, 'vertex', cols)
 
     df = katz_centrality_wrapper.katz_centrality(
         G, alpha, max_iter, tol, nstart, normalized

diff --git a/python/cugraph/centrality/katz_centrality_wrapper.pyx b/python/cugraph/centrality/katz_centrality_wrapper.pyx
@@ -34,7 +34,7 @@ def get_output_df(input_graph, nstart):
         if len(nstart) != num_verts:
             raise ValueError('nstart must have initial guess for all vertices')
 
-        nstart['values'] = graph_primtypes_wrapper.datatype_cast([nstart['values']], [np.float64])
+        nstart['values'] = graph_primtypes_wrapper.datatype_cast([nstart['values']], [np.float64])[0]
         df['katz_centrality'][nstart['vertex']] = nstart['values']
 
     return df

diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py
@@ -58,8 +58,10 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None):
         Graph or matrix object, which should contain the connectivity
         information. Edge weights, if present, should be single or double
         precision floating point values.
-    n : integer
-        A single node
+    n : integer or cudf.DataFrame
+        A single node as integer or a cudf.DataFrame if nodes are
+        represented with multiple columns. If a cudf.DataFrame is provided,
+        only the first row is taken as the node input.
     radius: integer, optional
         Include all neighbors of distance<=radius from n.
     center: bool, optional
@@ -91,20 +93,25 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None):
     result_graph = type(G)()
 
     if G.renumbered is True:
-        n = G.lookup_internal_vertex_id(cudf.Series([n]))
+        if isinstance(n, cudf.DataFrame):
+            n = G.lookup_internal_vertex_id(n, n.columns)
+        else:
+            n = G.lookup_internal_vertex_id(cudf.Series([n]))
 
     df, offsets = egonet_wrapper.egonet(G, n, radius)
 
     if G.renumbered:
-        df = G.unrenumber(df, "src")
-        df = G.unrenumber(df, "dst")
+        df, src_names = G.unrenumber(df, "src", get_column_names=True)
+        df, dst_names = G.unrenumber(df, "dst", get_column_names=True)
 
     if G.edgelist.weights:
         result_graph.from_cudf_edgelist(
-            df, source="src", destination="dst", edge_attr="weight"
+            df, source=src_names, destination=dst_names,
+            edge_attr="weight"
         )
     else:
-        result_graph.from_cudf_edgelist(df, source="src", destination="dst")
+        result_graph.from_cudf_edgelist(df, source=src_names,
+                                        destination=dst_names)
     return _convert_graph_to_output_type(result_graph, input_type)
 
 
@@ -121,8 +128,8 @@ def batched_ego_graphs(
         Graph or matrix object, which should contain the connectivity
         information. Edge weights, if present, should be single or double
         precision floating point values.
-    seeds : cudf.Series or list
-        Specifies the seeds of the induced egonet subgraphs
+    seeds : cudf.Series or list or cudf.DataFrame
+        Specifies the seeds of the induced egonet subgraphs.
     radius: integer, optional
         Include all neighbors of distance<=radius from n.
     center: bool, optional
@@ -145,7 +152,10 @@ def batched_ego_graphs(
     (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight")
 
     if G.renumbered is True:
-        seeds = G.lookup_internal_vertex_id(cudf.Series(seeds))
+        if isinstance(seeds, cudf.DataFrame):
+            seeds = G.lookup_internal_vertex_id(seeds, seeds.columns)
+        else:
+            seeds = G.lookup_internal_vertex_id(cudf.Series(seeds))
 
     df, offsets = egonet_wrapper.egonet(G, seeds, radius)
 

diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py
@@ -12,8 +12,8 @@
 # limitations under the License.
 
 from cugraph.community import subgraph_extraction_wrapper
-from cugraph.structure.graph_classes import null_check
 from cugraph.utilities import check_nx_graph
+import cudf
 from cugraph.utilities import cugraph_to_nx
 
 
@@ -28,8 +28,9 @@ def subgraph(G, vertices):
     ----------
     G : cugraph.Graph
         cuGraph graph descriptor
-    vertices : cudf.Series
-        Specifies the vertices of the induced subgraph
+    vertices : cudf.Series or cudf.DataFrame
+        Specifies the vertices of the induced subgraph. For multi-column
+        vertices, vertices should be provided as a cudf.DataFrame
 
     Returns
     -------
@@ -52,27 +53,30 @@ def subgraph(G, vertices):
     >>> Sg = cugraph.subgraph(G, sverts)
     """
 
-    null_check(vertices)
-
     G, isNx = check_nx_graph(G)
 
     if G.renumbered:
-        vertices = G.lookup_internal_vertex_id(vertices)
+        if isinstance(vertices, cudf.DataFrame):
+            vertices = G.lookup_internal_vertex_id(vertices, vertices.columns)
+        else:
+            vertices = G.lookup_internal_vertex_id(vertices)
 
     result_graph = type(G)()
 
     df = subgraph_extraction_wrapper.subgraph(G, vertices)
 
     if G.renumbered:
-        df = G.unrenumber(df, "src")
-        df = G.unrenumber(df, "dst")
+        df, src_names = G.unrenumber(df, "src", get_column_names=True)
+        df, dst_names = G.unrenumber(df, "dst", get_column_names=True)
 
     if G.edgelist.weights:
         result_graph.from_cudf_edgelist(
-            df, source="src", destination="dst", edge_attr="weight"
+            df, source=src_names, destination=dst_names,
+            edge_attr="weight"
         )
     else:
-        result_graph.from_cudf_edgelist(df, source="src", destination="dst")
+        result_graph.from_cudf_edgelist(df, source=src_names,
+                                        destination=dst_names)
 
     if isNx is True:
         result_graph = cugraph_to_nx(result_graph)

diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py
@@ -69,31 +69,38 @@ def k_core(G, k=None, core_number=None):
 
     if core_number is not None:
         if G.renumbered is True:
-            core_number = G.add_internal_vertex_id(
-                core_number, "vertex", "vertex", drop=True
-            )
+            if len(G.renumber_map.implementation.col_names) > 1:
+                cols = core_number.columns[:-1].to_list()
+            else:
+                cols = 'vertex'
+            core_number = G.add_internal_vertex_id(core_number, 'vertex',
+                                                   cols)
+
     else:
         core_number = core_number_wrapper.core_number(G)
         core_number = core_number.rename(
             columns={"core_number": "values"}, copy=False
         )
-    print(core_number)
+
     if k is None:
         k = core_number["values"].max()
 
     k_core_df = k_core_wrapper.k_core(G, k, core_number)
 
     if G.renumbered:
-        k_core_df = G.unrenumber(k_core_df, "src")
-        k_core_df = G.unrenumber(k_core_df, "dst")
+        k_core_df, src_names = G.unrenumber(k_core_df, "src",
+                                            get_column_names=True)
+        k_core_df, dst_names = G.unrenumber(k_core_df, "dst",
+                                            get_column_names=True)
 
     if G.edgelist.weights:
         KCoreGraph.from_cudf_edgelist(
-            k_core_df, source="src", destination="dst", edge_attr="weight"
+            k_core_df, source=src_names, destination=dst_names,
+            edge_attr="weight"
         )
     else:
         KCoreGraph.from_cudf_edgelist(
-            k_core_df, source="src", destination="dst"
+            k_core_df, source=src_names, destination=dst_names,
         )
 
     if isNx is True:

diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py
@@ -11,6 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import cudf
 from cugraph.linear_assignment import lap_wrapper
 
 
@@ -39,9 +40,10 @@ def hungarian(G, workers):
         as an an edge list.  Edge weights are required. If an edge list is
         not provided then it will be computed.
 
-    workers : cudf.Series
+    workers : cudf.Series or cudf.DataFrame
         A series or column that identifies the vertex ids of the vertices
-        in the workers set.  All vertices in G that are not in the workers
+        in the workers set.  In case of multi-column vertices, it should be a
+        cudf.DataFrame. All vertices in G that are not in the workers
         set are implicitly assigned to the jobs set.
 
     Returns
@@ -67,16 +69,20 @@ def hungarian(G, workers):
     """
 
     if G.renumbered:
-        local_workers = G.lookup_internal_vertex_id(workers)
+        if isinstance(workers, cudf.DataFrame):
+            local_workers = G.lookup_internal_vertex_id(workers,
+                                                        workers.columns)
+        else:
+            local_workers = G.lookup_internal_vertex_id(workers)
     else:
         local_workers = workers
 
-    df = lap_wrapper.sparse_hungarian(G, local_workers)
+    cost, df = lap_wrapper.sparse_hungarian(G, local_workers)
 
     if G.renumbered:
         df = G.unrenumber(df, 'vertex')
 
-    return df
+    return cost, df
 
 
 def dense_hungarian(costs, num_rows, num_columns):

diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py
@@ -35,9 +35,10 @@ def random_walks(
         Use weight parameter if weights need to be considered
         (currently not supported)
 
-    start_vertices : int or list or cudf.Series
+    start_vertices : int or list or cudf.Series or cudf.DataFrame
         A single node or a list or a cudf.Series of nodes from which to run
-        the random walks
+        the random walks. In case of multi-column vertices it should be
+        a cudf.DataFrame
 
     max_depth : int
         The maximum depth of the random walks
@@ -61,11 +62,17 @@ def random_walks(
     if start_vertices is int:
         start_vertices = [start_vertices]
 
-    if not isinstance(start_vertices, cudf.Series):
+    if isinstance(start_vertices, list):
         start_vertices = cudf.Series(start_vertices)
 
     if G.renumbered is True:
-        start_vertices = G.lookup_internal_vertex_id(start_vertices)
+        if isinstance(start_vertices, cudf.DataFrame):
+            start_vertices = G.lookup_internal_vertex_id(
+                start_vertices,
+                start_vertices.columns)
+        else:
+            start_vertices = G.lookup_internal_vertex_id(start_vertices)
+
     vertex_set, edge_set, sizes = random_walks_wrapper.random_walks(
         G, start_vertices, max_depth)
 

diff --git a/python/cugraph/structure/graph_classes.py b/python/cugraph/structure/graph_classes.py
@@ -293,7 +293,8 @@ def from_numpy_matrix(self, np_matrix):
         np_array = np.asarray(np_matrix)
         self.from_numpy_array(np_array)
 
-    def unrenumber(self, df, column_name, preserve_order=False):
+    def unrenumber(self, df, column_name, preserve_order=False,
+                   get_column_names=False):
         """
         Given a DataFrame containing internal vertex ids in the identified
         column, replace this with external vertex ids.  If the renumbering
@@ -322,7 +323,8 @@ def unrenumber(self, df, column_name, preserve_order=False):
             vertex identifiers are added to the DataFrame, the internal
             vertex identifier column is removed from the dataframe.
         """
-        return self.renumber_map.unrenumber(df, column_name, preserve_order)
+        return self.renumber_map.unrenumber(df, column_name, preserve_order,
+                                            get_column_names)
 
     def lookup_internal_vertex_id(self, df, column_name=None):
         """

diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py
@@ -591,7 +591,8 @@ def get_renumbered_df(data):
             renumber_map.implementation.numbered = True
             return renumbered_df, renumber_map
 
-    def unrenumber(self, df, column_name, preserve_order=False):
+    def unrenumber(self, df, column_name, preserve_order=False,
+                   get_column_names=False):
         """
         Given a DataFrame containing internal vertex ids in the identified
         column, replace this with external vertex ids.  If the renumbering
@@ -611,12 +612,17 @@ def unrenumber(self, df, column_name, preserve_order=False):
         preserve_order: (optional) bool
             If True, preserve the ourder of the rows in the output
             DataFrame to match the input DataFrame
+        get_column_names: (optional) bool
+            If True, the unrenumbered column names are returned.
         Returns
         ---------
         df : cudf.DataFrame or dask_cudf.DataFrame
             The original DataFrame columns exist unmodified.  The external
             vertex identifiers are added to the DataFrame, the internal
             vertex identifier column is removed from the dataframe.
+        column_names: string or list of strings
+            If get_column_names is True, the unrenumbered column names are
+            returned.
         Examples
         --------
         >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
@@ -636,11 +642,13 @@ def unrenumber(self, df, column_name, preserve_order=False):
         if len(self.implementation.col_names) == 1:
             # Output will be renamed to match input
             mapping = {"0": column_name}
+            col_names = column_name
         else:
             # Output will be renamed to ${i}_${column_name}
             mapping = {}
             for nm in self.implementation.col_names:
                 mapping[nm] = nm + "_" + column_name
+            col_names = list(mapping.values())
 
         if preserve_order:
             index_name = NumberMap.generate_unused_column_name(df)
@@ -654,8 +662,12 @@ def unrenumber(self, df, column_name, preserve_order=False):
             ).drop(columns=index_name).reset_index(drop=True)
 
         if type(df) is dask_cudf.DataFrame:
-            return df.map_partitions(
+            df = df.map_partitions(
                 lambda df: df.rename(columns=mapping, copy=False)
             )
         else:
-            return df.rename(columns=mapping, copy=False)
+            df = df.rename(columns=mapping, copy=False)
+        if get_column_names:
+            return df, col_names
+        else:
+            return df