Skip to content

Commit

Permalink
Merge 0a6d3b2 into 1c7142e
Browse files Browse the repository at this point in the history
  • Loading branch information
Iroy30 authored May 3, 2021
2 parents 1c7142e + 0a6d3b2 commit f3c1029
Show file tree
Hide file tree
Showing 15 changed files with 284 additions and 51 deletions.
6 changes: 5 additions & 1 deletion python/cugraph/centrality/katz_centrality.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,11 @@ def katz_centrality(

if nstart is not None:
if G.renumbered is True:
nstart = G.add_internal_vertex_id(nstart, 'vertex', 'vertex')
if len(G.renumber_map.implementation.col_names) > 1:
cols = nstart.columns[:-1].to_list()
else:
cols = 'vertex'
nstart = G.add_internal_vertex_id(nstart, 'vertex', cols)

df = katz_centrality_wrapper.katz_centrality(
G, alpha, max_iter, tol, nstart, normalized
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/centrality/katz_centrality_wrapper.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def get_output_df(input_graph, nstart):
if len(nstart) != num_verts:
raise ValueError('nstart must have initial guess for all vertices')

nstart['values'] = graph_primtypes_wrapper.datatype_cast([nstart['values']], [np.float64])
nstart['values'] = graph_primtypes_wrapper.datatype_cast([nstart['values']], [np.float64])[0]
df['katz_centrality'][nstart['vertex']] = nstart['values']

return df
Expand Down
30 changes: 20 additions & 10 deletions python/cugraph/community/egonet.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None):
Graph or matrix object, which should contain the connectivity
information. Edge weights, if present, should be single or double
precision floating point values.
n : integer
A single node
n : integer or cudf.DataFrame
A single node as integer or a cudf.DataFrame if nodes are
represented with multiple columns. If a cudf.DataFrame is provided,
only the first row is taken as the node input.
radius: integer, optional
Include all neighbors of distance<=radius from n.
center: bool, optional
Expand Down Expand Up @@ -91,20 +93,25 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None):
result_graph = type(G)()

if G.renumbered is True:
n = G.lookup_internal_vertex_id(cudf.Series([n]))
if isinstance(n, cudf.DataFrame):
n = G.lookup_internal_vertex_id(n, n.columns)
else:
n = G.lookup_internal_vertex_id(cudf.Series([n]))

df, offsets = egonet_wrapper.egonet(G, n, radius)

if G.renumbered:
df = G.unrenumber(df, "src")
df = G.unrenumber(df, "dst")
df, src_names = G.unrenumber(df, "src", get_column_names=True)
df, dst_names = G.unrenumber(df, "dst", get_column_names=True)

if G.edgelist.weights:
result_graph.from_cudf_edgelist(
df, source="src", destination="dst", edge_attr="weight"
df, source=src_names, destination=dst_names,
edge_attr="weight"
)
else:
result_graph.from_cudf_edgelist(df, source="src", destination="dst")
result_graph.from_cudf_edgelist(df, source=src_names,
destination=dst_names)
return _convert_graph_to_output_type(result_graph, input_type)


Expand All @@ -121,8 +128,8 @@ def batched_ego_graphs(
Graph or matrix object, which should contain the connectivity
information. Edge weights, if present, should be single or double
precision floating point values.
seeds : cudf.Series or list
Specifies the seeds of the induced egonet subgraphs
seeds : cudf.Series or list or cudf.DataFrame
Specifies the seeds of the induced egonet subgraphs.
radius: integer, optional
Include all neighbors of distance<=radius from n.
center: bool, optional
Expand All @@ -145,7 +152,10 @@ def batched_ego_graphs(
(G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight")

if G.renumbered is True:
seeds = G.lookup_internal_vertex_id(cudf.Series(seeds))
if isinstance(seeds, cudf.DataFrame):
seeds = G.lookup_internal_vertex_id(seeds, seeds.columns)
else:
seeds = G.lookup_internal_vertex_id(cudf.Series(seeds))

df, offsets = egonet_wrapper.egonet(G, seeds, radius)

Expand Down
24 changes: 14 additions & 10 deletions python/cugraph/community/subgraph_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# limitations under the License.

from cugraph.community import subgraph_extraction_wrapper
from cugraph.structure.graph_classes import null_check
from cugraph.utilities import check_nx_graph
import cudf
from cugraph.utilities import cugraph_to_nx


Expand All @@ -28,8 +28,9 @@ def subgraph(G, vertices):
----------
G : cugraph.Graph
cuGraph graph descriptor
vertices : cudf.Series
Specifies the vertices of the induced subgraph
vertices : cudf.Series or cudf.DataFrame
Specifies the vertices of the induced subgraph. For multi-column
vertices, vertices should be provided as a cudf.DataFrame
Returns
-------
Expand All @@ -52,27 +53,30 @@ def subgraph(G, vertices):
>>> Sg = cugraph.subgraph(G, sverts)
"""

null_check(vertices)

G, isNx = check_nx_graph(G)

if G.renumbered:
vertices = G.lookup_internal_vertex_id(vertices)
if isinstance(vertices, cudf.DataFrame):
vertices = G.lookup_internal_vertex_id(vertices, vertices.columns)
else:
vertices = G.lookup_internal_vertex_id(vertices)

result_graph = type(G)()

df = subgraph_extraction_wrapper.subgraph(G, vertices)

if G.renumbered:
df = G.unrenumber(df, "src")
df = G.unrenumber(df, "dst")
df, src_names = G.unrenumber(df, "src", get_column_names=True)
df, dst_names = G.unrenumber(df, "dst", get_column_names=True)

if G.edgelist.weights:
result_graph.from_cudf_edgelist(
df, source="src", destination="dst", edge_attr="weight"
df, source=src_names, destination=dst_names,
edge_attr="weight"
)
else:
result_graph.from_cudf_edgelist(df, source="src", destination="dst")
result_graph.from_cudf_edgelist(df, source=src_names,
destination=dst_names)

if isNx is True:
result_graph = cugraph_to_nx(result_graph)
Expand Down
23 changes: 15 additions & 8 deletions python/cugraph/cores/k_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,31 +69,38 @@ def k_core(G, k=None, core_number=None):

if core_number is not None:
if G.renumbered is True:
core_number = G.add_internal_vertex_id(
core_number, "vertex", "vertex", drop=True
)
if len(G.renumber_map.implementation.col_names) > 1:
cols = core_number.columns[:-1].to_list()
else:
cols = 'vertex'
core_number = G.add_internal_vertex_id(core_number, 'vertex',
cols)

else:
core_number = core_number_wrapper.core_number(G)
core_number = core_number.rename(
columns={"core_number": "values"}, copy=False
)
print(core_number)

if k is None:
k = core_number["values"].max()

k_core_df = k_core_wrapper.k_core(G, k, core_number)

if G.renumbered:
k_core_df = G.unrenumber(k_core_df, "src")
k_core_df = G.unrenumber(k_core_df, "dst")
k_core_df, src_names = G.unrenumber(k_core_df, "src",
get_column_names=True)
k_core_df, dst_names = G.unrenumber(k_core_df, "dst",
get_column_names=True)

if G.edgelist.weights:
KCoreGraph.from_cudf_edgelist(
k_core_df, source="src", destination="dst", edge_attr="weight"
k_core_df, source=src_names, destination=dst_names,
edge_attr="weight"
)
else:
KCoreGraph.from_cudf_edgelist(
k_core_df, source="src", destination="dst"
k_core_df, source=src_names, destination=dst_names,
)

if isNx is True:
Expand Down
16 changes: 11 additions & 5 deletions python/cugraph/linear_assignment/lap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import cudf
from cugraph.linear_assignment import lap_wrapper


Expand Down Expand Up @@ -39,9 +40,10 @@ def hungarian(G, workers):
as an an edge list. Edge weights are required. If an edge list is
not provided then it will be computed.
workers : cudf.Series
workers : cudf.Series or cudf.DataFrame
A series or column that identifies the vertex ids of the vertices
in the workers set. All vertices in G that are not in the workers
in the workers set. In case of multi-column vertices, it should be a
cudf.DataFrame. All vertices in G that are not in the workers
set are implicitly assigned to the jobs set.
Returns
Expand All @@ -67,16 +69,20 @@ def hungarian(G, workers):
"""

if G.renumbered:
local_workers = G.lookup_internal_vertex_id(workers)
if isinstance(workers, cudf.DataFrame):
local_workers = G.lookup_internal_vertex_id(workers,
workers.columns)
else:
local_workers = G.lookup_internal_vertex_id(workers)
else:
local_workers = workers

df = lap_wrapper.sparse_hungarian(G, local_workers)
cost, df = lap_wrapper.sparse_hungarian(G, local_workers)

if G.renumbered:
df = G.unrenumber(df, 'vertex')

return df
return cost, df


def dense_hungarian(costs, num_rows, num_columns):
Expand Down
15 changes: 11 additions & 4 deletions python/cugraph/sampling/random_walks.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ def random_walks(
Use weight parameter if weights need to be considered
(currently not supported)
start_vertices : int or list or cudf.Series
start_vertices : int or list or cudf.Series or cudf.DataFrame
A single node or a list or a cudf.Series of nodes from which to run
the random walks
the random walks. In case of multi-column vertices it should be
a cudf.DataFrame
max_depth : int
The maximum depth of the random walks
Expand All @@ -61,11 +62,17 @@ def random_walks(
if start_vertices is int:
start_vertices = [start_vertices]

if not isinstance(start_vertices, cudf.Series):
if isinstance(start_vertices, list):
start_vertices = cudf.Series(start_vertices)

if G.renumbered is True:
start_vertices = G.lookup_internal_vertex_id(start_vertices)
if isinstance(start_vertices, cudf.DataFrame):
start_vertices = G.lookup_internal_vertex_id(
start_vertices,
start_vertices.columns)
else:
start_vertices = G.lookup_internal_vertex_id(start_vertices)

vertex_set, edge_set, sizes = random_walks_wrapper.random_walks(
G, start_vertices, max_depth)

Expand Down
6 changes: 4 additions & 2 deletions python/cugraph/structure/graph_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ def from_numpy_matrix(self, np_matrix):
np_array = np.asarray(np_matrix)
self.from_numpy_array(np_array)

def unrenumber(self, df, column_name, preserve_order=False):
def unrenumber(self, df, column_name, preserve_order=False,
get_column_names=False):
"""
Given a DataFrame containing internal vertex ids in the identified
column, replace this with external vertex ids. If the renumbering
Expand Down Expand Up @@ -322,7 +323,8 @@ def unrenumber(self, df, column_name, preserve_order=False):
vertex identifiers are added to the DataFrame, the internal
vertex identifier column is removed from the dataframe.
"""
return self.renumber_map.unrenumber(df, column_name, preserve_order)
return self.renumber_map.unrenumber(df, column_name, preserve_order,
get_column_names)

def lookup_internal_vertex_id(self, df, column_name=None):
"""
Expand Down
18 changes: 15 additions & 3 deletions python/cugraph/structure/number_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,8 @@ def get_renumbered_df(data):
renumber_map.implementation.numbered = True
return renumbered_df, renumber_map

def unrenumber(self, df, column_name, preserve_order=False):
def unrenumber(self, df, column_name, preserve_order=False,
get_column_names=False):
"""
Given a DataFrame containing internal vertex ids in the identified
column, replace this with external vertex ids. If the renumbering
Expand All @@ -611,12 +612,17 @@ def unrenumber(self, df, column_name, preserve_order=False):
preserve_order: (optional) bool
If True, preserve the ourder of the rows in the output
DataFrame to match the input DataFrame
get_column_names: (optional) bool
If True, the unrenumbered column names are returned.
Returns
---------
df : cudf.DataFrame or dask_cudf.DataFrame
The original DataFrame columns exist unmodified. The external
vertex identifiers are added to the DataFrame, the internal
vertex identifier column is removed from the dataframe.
column_names: string or list of strings
If get_column_names is True, the unrenumbered column names are
returned.
Examples
--------
>>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
Expand All @@ -636,11 +642,13 @@ def unrenumber(self, df, column_name, preserve_order=False):
if len(self.implementation.col_names) == 1:
# Output will be renamed to match input
mapping = {"0": column_name}
col_names = column_name
else:
# Output will be renamed to ${i}_${column_name}
mapping = {}
for nm in self.implementation.col_names:
mapping[nm] = nm + "_" + column_name
col_names = list(mapping.values())

if preserve_order:
index_name = NumberMap.generate_unused_column_name(df)
Expand All @@ -654,8 +662,12 @@ def unrenumber(self, df, column_name, preserve_order=False):
).drop(columns=index_name).reset_index(drop=True)

if type(df) is dask_cudf.DataFrame:
return df.map_partitions(
df = df.map_partitions(
lambda df: df.rename(columns=mapping, copy=False)
)
else:
return df.rename(columns=mapping, copy=False)
df = df.rename(columns=mapping, copy=False)
if get_column_names:
return df, col_names
else:
return df
Loading

0 comments on commit f3c1029

Please sign in to comment.