Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] enable multigraph #1280

Merged
merged 10 commits into from
Jan 21, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- PR #1279 Add self loop check variable in graph
- PR #1277 SciPy sparse matrix input support for WCC, SCC, SSSP, and BFS
- PR #1278 Add support for shortest_path_length and fix graph vertex checks
- PR #1280 Add Multi(Di)Graph support

## Improvements
- PR #1227 Pin cmake policies to cmake 3.17 version
Expand Down
2 changes: 2 additions & 0 deletions python/cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from cugraph.structure import (
Graph,
DiGraph,
MultiGraph,
MultiDiGraph,
from_edgelist,
from_cudf_edgelist,
from_pandas_edgelist,
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/structure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.structure.graph import Graph, DiGraph
from cugraph.structure.graph import Graph, DiGraph, MultiGraph, MultiDiGraph
from cugraph.structure.number_map import NumberMap
from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf
from cugraph.structure.convert_matrix import (from_edgelist,
Expand Down
72 changes: 44 additions & 28 deletions python/cugraph/structure/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def __init__(self, offsets, indices, value=None):
def __init__(
self,
m_graph=None,
edge_attr=None,
symmetrized=False,
bipartite=False,
multi=False,
Expand Down Expand Up @@ -113,24 +112,40 @@ def __init__(
self.batch_transposed_adjlists = None

if m_graph is not None:
if (type(self) is Graph and type(m_graph) is MultiGraph) or (
"""if (type(self) is Graph and type(m_graph) is MultiGraph) or (
Iroy30 marked this conversation as resolved.
Show resolved Hide resolved
type(self) is DiGraph and type(m_graph) is MultiDiGraph
):
self.from_cudf_edgelist(
m_graph.edgelist.edgelist_df,
source="src",
destination="dst",
edge_attr=edge_attr,
)
self.renumbered = m_graph.renumbered
self.renumber_map = m_graph.renumber_map
elist = m_graph.view_edge_list()
if m_graph.edgelist.weights:
weights = "weights"
else:
weights = None
self.from_cudf_edgelist(elist,
source = "src",
destination = "dst",
edge_attr = weights)
else:
msg = (
"Graph can be initialized using MultiGraph "
"and DiGraph can be initialized using MultiDiGraph"
)
raise Exception(msg)"""
if type(m_graph) is MultiGraph or type(m_graph) is MultiDiGraph:
elist = m_graph.view_edge_list()
if m_graph.edgelist.weights:
weights = "weights"
else:
weights = None
self.from_cudf_edgelist(elist,
source="src",
destination="dst",
edge_attr=weights)
else:
msg = (
"Graph can only be initialized using MultiGraph "
"or MultiDiGraph"
)
raise Exception(msg)
# self.number_of_vertices = None

def enable_batch(self):
client = mg_utils.get_client()
Expand Down Expand Up @@ -278,6 +293,12 @@ def is_multipartite(self):
# TO DO: Call coloring algorithm
return self.multipartite or self.bipartite

def is_multigraph(self):
"""
Returns True if the graph is a multigraph. Else returns False.
"""
return self.multi

def sets(self):
"""
Returns the bipartite set of nodes. This solely relies on the user's
Expand Down Expand Up @@ -409,24 +430,19 @@ def from_cudf_edgelist(
source_col = elist[source]
dest_col = elist[destination]

if self.multi:
if type(edge_attr) is not list:
raise Exception("edge_attr should be a list of column names")
value_col = {}
for col_name in edge_attr:
value_col[col_name] = elist[col_name]
elif edge_attr is not None:
if edge_attr is not None:
value_col = elist[edge_attr]
else:
value_col = None

if not self.symmetrized and not self.multi:
if value_col is not None:
source_col, dest_col, value_col = symmetrize(
source_col, dest_col, value_col
)
else:
source_col, dest_col = symmetrize(source_col, dest_col)
if value_col is not None:
source_col, dest_col, value_col = symmetrize(
source_col, dest_col, value_col, multi=self.multi,
symmetrize=not self.symmetrized)
else:
source_col, dest_col = symmetrize(
source_col, dest_col, multi=self.multi,
symmetrize=not self.symmetrized)
Iroy30 marked this conversation as resolved.
Show resolved Hide resolved

self.edgelist = Graph.EdgeList(source_col, dest_col, value_col)

Expand Down Expand Up @@ -727,7 +743,7 @@ def view_edge_list(self):
edgelist_df = self.unrenumber(edgelist_df, "src")
edgelist_df = self.unrenumber(edgelist_df, "dst")

if type(self) is Graph:
if type(self) is Graph or type(self) is MultiGraph:
edgelist_df = edgelist_df[edgelist_df["src"] <= edgelist_df["dst"]]
edgelist_df = edgelist_df.reset_index(drop=True)
self.edge_count = len(edgelist_df)
Expand Down Expand Up @@ -1019,7 +1035,7 @@ def number_of_edges(self, directed_edges=False):
return len(self.edgelist.edgelist_df)
if self.edge_count is None:
if self.edgelist is not None:
if type(self) is Graph:
if type(self) is Graph or type(self) is MultiGraph:
self.edge_count = len(
self.edgelist.edgelist_df[
self.edgelist.edgelist_df["src"]
Expand Down Expand Up @@ -1513,7 +1529,7 @@ def add_internal_vertex_id(
class DiGraph(Graph):
def __init__(self, m_graph=None, edge_attr=None):
Iroy30 marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(
m_graph=m_graph, edge_attr=edge_attr, symmetrized=True
m_graph=m_graph, symmetrized=True
)


Expand Down
42 changes: 24 additions & 18 deletions python/cugraph/structure/symmetrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import dask_cudf


def symmetrize_df(df, src_name, dst_name):
def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True):
BradReesWork marked this conversation as resolved.
Show resolved Hide resolved
Iroy30 marked this conversation as resolved.
Show resolved Hide resolved
"""
Take a COO stored in a DataFrame, along with the column names of
the source and destination columns and create a new data frame
Expand Down Expand Up @@ -54,26 +54,30 @@ def symmetrize_df(df, src_name, dst_name):
>>> sym_ddf = cugraph.symmetrize_ddf(ddf, "src", "dst", "weight")
>>> Comms.destroy()
"""
gdf = cudf.DataFrame()

#
# Now append the columns. We add sources to the end of destinations,
# and destinations to the end of sources. Otherwise we append a
# column onto itself.
#
for idx, name in enumerate(df.columns):
if name == src_name:
gdf[src_name] = df[src_name].append(
df[dst_name], ignore_index=True
)
elif name == dst_name:
gdf[dst_name] = df[dst_name].append(
df[src_name], ignore_index=True
)
else:
gdf[name] = df[name].append(df[name], ignore_index=True)

return gdf.groupby(by=[src_name, dst_name], as_index=False).min()
if symmetrize:
BradReesWork marked this conversation as resolved.
Show resolved Hide resolved
gdf = cudf.DataFrame()
for idx, name in enumerate(df.columns):
if name == src_name:
gdf[src_name] = df[src_name].append(
df[dst_name], ignore_index=True
)
elif name == dst_name:
gdf[dst_name] = df[dst_name].append(
df[src_name], ignore_index=True
)
else:
gdf[name] = df[name].append(df[name], ignore_index=True)
else:
gdf = df
if multi:
return gdf
else:
return gdf.groupby(by=[src_name, dst_name], as_index=False).min()


def symmetrize_ddf(df, src_name, dst_name, weight_name=None):
Expand Down Expand Up @@ -129,7 +133,8 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None):
return result


def symmetrize(source_col, dest_col, value_col=None):
def symmetrize(source_col, dest_col, value_col=None, multi=False,
symmetrize=True):
Iroy30 marked this conversation as resolved.
Show resolved Hide resolved
"""
Take a COO set of source destination pairs along with associated values
stored in a single GPU or distributed
Expand Down Expand Up @@ -190,7 +195,8 @@ def symmetrize(source_col, dest_col, value_col=None):
input_df, "source", "destination", weight_name
).persist()
else:
output_df = symmetrize_df(input_df, "source", "destination")
output_df = symmetrize_df(input_df, "source", "destination", multi,
symmetrize)

if value_col is not None:
return (
Expand Down
96 changes: 96 additions & 0 deletions python/cugraph/tests/test_multigraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import cugraph
import networkx as nx
from cugraph.tests import utils
import pytest
import gc
import numpy as np


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_multigraph(graph_file):
gc.collect()
Iroy30 marked this conversation as resolved.
Show resolved Hide resolved
cuM = utils.read_csv_file(graph_file)
Iroy30 marked this conversation as resolved.
Show resolved Hide resolved
G = cugraph.MultiDiGraph()
G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")

nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
Gnx = nx.from_pandas_edgelist(
nxM,
source="0",
target="1",
edge_attr="weight",
create_using=nx.MultiDiGraph(),
)

assert G.number_of_edges() == Gnx.number_of_edges()
assert G.number_of_nodes() == Gnx.number_of_nodes()
cuedges = cugraph.to_pandas_edgelist(G)
cuedges.rename(columns={"src": "source", "dst": "target",
"weights": "weight"}, inplace=True)
cuedges["weight"] = cuedges["weight"].round(decimals=3)
nxedges = nx.to_pandas_edgelist(Gnx).astype(dtype={"source": "int32",
"target": "int32",
"weight": "float32"})
cuedges = cuedges.sort_values(by=["source", "target"]).\
reset_index(drop=True)
nxedges = nxedges.sort_values(by=["source", "target"]).\
reset_index(drop=True)
nxedges["weight"] = nxedges["weight"].round(decimals=3)
assert nxedges.equals(cuedges[["source", "target", "weight"]])


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_Graph_from_MultiGraph(graph_file):
cuM = utils.read_csv_file(graph_file)
GM = cugraph.MultiGraph()
GM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
GnxM = nx.from_pandas_edgelist(
nxM,
source="0",
target="1",
edge_attr="weight",
create_using=nx.MultiGraph(),
)

G = cugraph.Graph(GM)
Gnx = nx.Graph(GnxM)
assert Gnx.number_of_edges() == G.number_of_edges()

GdM = cugraph.MultiDiGraph()
GdM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
GnxdM = nx.from_pandas_edgelist(
nxM,
source="0",
target="1",
edge_attr="weight",
create_using=nx.MultiGraph(),
)
Gd = cugraph.DiGraph(GdM)
Gnxd = nx.DiGraph(GnxdM)
assert Gnxd.number_of_edges() == Gd.number_of_edges()


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_multigraph_sssp(graph_file):
gc.collect()
cuM = utils.read_csv_file(graph_file)
G = cugraph.MultiDiGraph()
G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
cu_paths = cugraph.sssp(G, 0)
max_val = np.finfo(cu_paths["distance"].dtype).max
cu_paths = cu_paths[cu_paths["distance"] != max_val]
nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
Gnx = nx.from_pandas_edgelist(
nxM,
source="0",
target="1",
edge_attr="weight",
create_using=nx.MultiDiGraph(),
)
nx_paths = nx.single_source_dijkstra_path_length(Gnx, 0)

cu_dist = cu_paths.sort_values(by='vertex')['distance'].to_array()
nx_dist = [i[1] for i in sorted(nx_paths.items())]

assert (cu_dist == nx_dist).all()
13 changes: 2 additions & 11 deletions python/cugraph/traversal/sssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import numpy as np

import cudf
from cugraph.structure import Graph, DiGraph
from cugraph.structure import Graph, DiGraph, MultiGraph, MultiDiGraph
from cugraph.traversal import sssp_wrapper
from cugraph.utilities import (ensure_cugraph_obj,
is_matrix_type,
Expand Down Expand Up @@ -104,7 +104,7 @@ def _convert_df_to_output_type(df, input_type, return_predecessors):
return_predecessors is only used for return values from cupy/scipy input
types.
"""
if input_type in [Graph, DiGraph]:
if input_type in [Graph, DiGraph, MultiGraph, MultiDiGraph]:
return df

elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]):
Expand Down Expand Up @@ -156,14 +156,6 @@ def sssp(G,

Parameters
----------
<<<<<<< HEAD
graph : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix Graph or
matrix object, which should contain the connectivity information. Edge
weights, if present, should be single or double precision floating
point values.
source : int
Index of the source vertex.
=======
graph : cuGraph.Graph, NetworkX.Graph, or CuPy sparse COO matrix
cuGraph graph descriptor with connectivity information. Edge weights,
if present, should be single or double precision floating point values.
Expand All @@ -175,7 +167,6 @@ def sssp(G,

If graph is an instance of a NetworkX.Graph:
str
>>>>>>> Document shortest_path_length and sssp behavior

Returns
-------
Expand Down
5 changes: 3 additions & 2 deletions python/cugraph/utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,12 @@ def ensure_cugraph_obj(obj, nx_weight_attr=None, matrix_graph_type=None):
cugraph Graph-type obj to create when converting from a matrix type.
"""
# FIXME: importing here to avoid circular import
from cugraph.structure import Graph, DiGraph
from cugraph.structure import Graph, DiGraph, MultiGraph, MultiDiGraph
from cugraph.utilities.nx_factory import convert_from_nx

input_type = type(obj)
if input_type in [Graph, DiGraph]:
print(input_type)
if input_type in [Graph, DiGraph, MultiGraph, MultiDiGraph]:
return (obj, input_type)

elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]):
Expand Down