Skip to content

Commit

Permalink
Merge pull request #1280 from Iroy30/add_multigraph
Browse files Browse the repository at this point in the history
[REVIEW] enable multigraph
  • Loading branch information
BradReesWork authored Jan 21, 2021
2 parents e205fd0 + 8a5bdc3 commit da66ecf
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 56 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- PR #1279 Add self loop check variable in graph
- PR #1277 SciPy sparse matrix input support for WCC, SCC, SSSP, and BFS
- PR #1278 Add support for shortest_path_length and fix graph vertex checks
- PR #1280 Add Multi(Di)Graph support

## Improvements
- PR #1227 Pin cmake policies to cmake 3.17 version
Expand Down
2 changes: 2 additions & 0 deletions python/cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from cugraph.structure import (
Graph,
DiGraph,
MultiGraph,
MultiDiGraph,
from_edgelist,
from_cudf_edgelist,
from_pandas_edgelist,
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/structure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.structure.graph import Graph, DiGraph
from cugraph.structure.graph import Graph, DiGraph, MultiGraph, MultiDiGraph
from cugraph.structure.number_map import NumberMap
from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf
from cugraph.structure.convert_matrix import (from_edgelist,
Expand Down
73 changes: 40 additions & 33 deletions python/cugraph/structure/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def __init__(self, offsets, indices, value=None):
def __init__(
self,
m_graph=None,
edge_attr=None,
symmetrized=False,
bipartite=False,
multi=False,
Expand Down Expand Up @@ -113,24 +112,22 @@ def __init__(
self.batch_transposed_adjlists = None

if m_graph is not None:
if (type(self) is Graph and type(m_graph) is MultiGraph) or (
type(self) is DiGraph and type(m_graph) is MultiDiGraph
):
self.from_cudf_edgelist(
m_graph.edgelist.edgelist_df,
source="src",
destination="dst",
edge_attr=edge_attr,
)
self.renumbered = m_graph.renumbered
self.renumber_map = m_graph.renumber_map
if type(m_graph) is MultiGraph or type(m_graph) is MultiDiGraph:
elist = m_graph.view_edge_list()
if m_graph.edgelist.weights:
weights = "weights"
else:
weights = None
self.from_cudf_edgelist(elist,
source="src",
destination="dst",
edge_attr=weights)
else:
msg = (
"Graph can be initialized using MultiGraph "
"and DiGraph can be initialized using MultiDiGraph"
"Graph can only be initialized using MultiGraph "
"or MultiDiGraph"
)
raise Exception(msg)
# self.number_of_vertices = None

def enable_batch(self):
client = mg_utils.get_client()
Expand Down Expand Up @@ -278,6 +275,12 @@ def is_multipartite(self):
# TO DO: Call coloring algorithm
return self.multipartite or self.bipartite

def is_multigraph(self):
"""
Returns True if the graph is a multigraph. Else returns False.
"""
return self.multi

def sets(self):
"""
Returns the bipartite set of nodes. This solely relies on the user's
Expand Down Expand Up @@ -409,24 +412,19 @@ def from_cudf_edgelist(
source_col = elist[source]
dest_col = elist[destination]

if self.multi:
if type(edge_attr) is not list:
raise Exception("edge_attr should be a list of column names")
value_col = {}
for col_name in edge_attr:
value_col[col_name] = elist[col_name]
elif edge_attr is not None:
if edge_attr is not None:
value_col = elist[edge_attr]
else:
value_col = None

if not self.symmetrized and not self.multi:
if value_col is not None:
source_col, dest_col, value_col = symmetrize(
source_col, dest_col, value_col
)
else:
source_col, dest_col = symmetrize(source_col, dest_col)
if value_col is not None:
source_col, dest_col, value_col = symmetrize(
source_col, dest_col, value_col, multi=self.multi,
symmetrize=not self.symmetrized)
else:
source_col, dest_col = symmetrize(
source_col, dest_col, multi=self.multi,
symmetrize=not self.symmetrized)

self.edgelist = Graph.EdgeList(source_col, dest_col, value_col)

Expand Down Expand Up @@ -727,7 +725,7 @@ def view_edge_list(self):
edgelist_df = self.unrenumber(edgelist_df, "src")
edgelist_df = self.unrenumber(edgelist_df, "dst")

if type(self) is Graph:
if type(self) is Graph or type(self) is MultiGraph:
edgelist_df = edgelist_df[edgelist_df["src"] <= edgelist_df["dst"]]
edgelist_df = edgelist_df.reset_index(drop=True)
self.edge_count = len(edgelist_df)
Expand Down Expand Up @@ -1019,7 +1017,7 @@ def number_of_edges(self, directed_edges=False):
return len(self.edgelist.edgelist_df)
if self.edge_count is None:
if self.edgelist is not None:
if type(self) is Graph:
if type(self) is Graph or type(self) is MultiGraph:
self.edge_count = len(
self.edgelist.edgelist_df[
self.edgelist.edgelist_df["src"]
Expand Down Expand Up @@ -1511,17 +1509,26 @@ def add_internal_vertex_id(


class DiGraph(Graph):
def __init__(self, m_graph=None, edge_attr=None):
"""
cuGraph directed graph class. Drops parallel edges.
"""
def __init__(self, m_graph=None):
super().__init__(
m_graph=m_graph, edge_attr=edge_attr, symmetrized=True
m_graph=m_graph, symmetrized=True
)


class MultiGraph(Graph):
"""
cuGraph class to create and store undirected graphs with parallel edges.
"""
def __init__(self, renumbered=True):
super().__init__(multi=True)


class MultiDiGraph(Graph):
"""
cuGraph class to create and store directed graphs with parallel edges.
"""
def __init__(self, renumbered=True):
super().__init__(symmetrized=True, multi=True)
55 changes: 37 additions & 18 deletions python/cugraph/structure/symmetrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import dask_cudf


def symmetrize_df(df, src_name, dst_name):
def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True):
"""
Take a COO stored in a DataFrame, along with the column names of
the source and destination columns and create a new data frame
Expand All @@ -42,6 +42,13 @@ def symmetrize_df(df, src_name, dst_name):
Name of the column in the data frame containing the source ids
dst_name : string
Name of the column in the data frame containing the destination ids
multi : bool
Set to True if graph is a Multi(Di)Graph. This allows multiple
edges instead of dropping them.
symmetrize : bool
Default is True to perform symmetrization. If False only duplicate
edges are dropped.
Examples
--------
>>> import cugraph.dask as dcg
Expand All @@ -54,26 +61,30 @@ def symmetrize_df(df, src_name, dst_name):
>>> sym_ddf = cugraph.symmetrize_ddf(ddf, "src", "dst", "weight")
>>> Comms.destroy()
"""
gdf = cudf.DataFrame()

#
# Now append the columns. We add sources to the end of destinations,
# and destinations to the end of sources. Otherwise we append a
# column onto itself.
#
for idx, name in enumerate(df.columns):
if name == src_name:
gdf[src_name] = df[src_name].append(
df[dst_name], ignore_index=True
)
elif name == dst_name:
gdf[dst_name] = df[dst_name].append(
df[src_name], ignore_index=True
)
else:
gdf[name] = df[name].append(df[name], ignore_index=True)

return gdf.groupby(by=[src_name, dst_name], as_index=False).min()
if symmetrize:
gdf = cudf.DataFrame()
for idx, name in enumerate(df.columns):
if name == src_name:
gdf[src_name] = df[src_name].append(
df[dst_name], ignore_index=True
)
elif name == dst_name:
gdf[dst_name] = df[dst_name].append(
df[src_name], ignore_index=True
)
else:
gdf[name] = df[name].append(df[name], ignore_index=True)
else:
gdf = df
if multi:
return gdf
else:
return gdf.groupby(by=[src_name, dst_name], as_index=False).min()


def symmetrize_ddf(df, src_name, dst_name, weight_name=None):
Expand Down Expand Up @@ -105,6 +116,12 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None):
Name of the column in the data frame containing the source ids
dst_name : string
Name of the column in the data frame containing the destination ids
multi : bool
Set to True if graph is a Multi(Di)Graph. This allows multiple
edges instead of dropping them.
symmetrize : bool
Default is True to perform symmetrization. If False only duplicate
edges are dropped.
Examples
--------
Expand All @@ -129,7 +146,8 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None):
return result


def symmetrize(source_col, dest_col, value_col=None):
def symmetrize(source_col, dest_col, value_col=None, multi=False,
symmetrize=True):
"""
Take a COO set of source destination pairs along with associated values
stored in a single GPU or distributed
Expand Down Expand Up @@ -190,7 +208,8 @@ def symmetrize(source_col, dest_col, value_col=None):
input_df, "source", "destination", weight_name
).persist()
else:
output_df = symmetrize_df(input_df, "source", "destination")
output_df = symmetrize_df(input_df, "source", "destination", multi,
symmetrize)

if value_col is not None:
return (
Expand Down
104 changes: 104 additions & 0 deletions python/cugraph/tests/test_multigraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import cugraph
import networkx as nx
from cugraph.tests import utils
import pytest
import gc
import numpy as np


# =============================================================================
# Pytest Setup / Teardown - called for each test function
# =============================================================================
def setup_function():
gc.collect()


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_multigraph(graph_file):
# FIXME: Migrate to new test fixtures for Graph setup once available
cuM = utils.read_csv_file(graph_file)
G = cugraph.MultiDiGraph()
G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")

nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
Gnx = nx.from_pandas_edgelist(
nxM,
source="0",
target="1",
edge_attr="weight",
create_using=nx.MultiDiGraph(),
)

assert G.number_of_edges() == Gnx.number_of_edges()
assert G.number_of_nodes() == Gnx.number_of_nodes()
cuedges = cugraph.to_pandas_edgelist(G)
cuedges.rename(columns={"src": "source", "dst": "target",
"weights": "weight"}, inplace=True)
cuedges["weight"] = cuedges["weight"].round(decimals=3)
nxedges = nx.to_pandas_edgelist(Gnx).astype(dtype={"source": "int32",
"target": "int32",
"weight": "float32"})
cuedges = cuedges.sort_values(by=["source", "target"]).\
reset_index(drop=True)
nxedges = nxedges.sort_values(by=["source", "target"]).\
reset_index(drop=True)
nxedges["weight"] = nxedges["weight"].round(decimals=3)
assert nxedges.equals(cuedges[["source", "target", "weight"]])


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_Graph_from_MultiGraph(graph_file):
# FIXME: Migrate to new test fixtures for Graph setup once available
cuM = utils.read_csv_file(graph_file)
GM = cugraph.MultiGraph()
GM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
GnxM = nx.from_pandas_edgelist(
nxM,
source="0",
target="1",
edge_attr="weight",
create_using=nx.MultiGraph(),
)

G = cugraph.Graph(GM)
Gnx = nx.Graph(GnxM)
assert Gnx.number_of_edges() == G.number_of_edges()

GdM = cugraph.MultiDiGraph()
GdM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
GnxdM = nx.from_pandas_edgelist(
nxM,
source="0",
target="1",
edge_attr="weight",
create_using=nx.MultiGraph(),
)
Gd = cugraph.DiGraph(GdM)
Gnxd = nx.DiGraph(GnxdM)
assert Gnxd.number_of_edges() == Gd.number_of_edges()


@pytest.mark.parametrize("graph_file", utils.DATASETS)
def test_multigraph_sssp(graph_file):
# FIXME: Migrate to new test fixtures for Graph setup once available
cuM = utils.read_csv_file(graph_file)
G = cugraph.MultiDiGraph()
G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
cu_paths = cugraph.sssp(G, 0)
max_val = np.finfo(cu_paths["distance"].dtype).max
cu_paths = cu_paths[cu_paths["distance"] != max_val]
nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
Gnx = nx.from_pandas_edgelist(
nxM,
source="0",
target="1",
edge_attr="weight",
create_using=nx.MultiDiGraph(),
)
nx_paths = nx.single_source_dijkstra_path_length(Gnx, 0)

cu_dist = cu_paths.sort_values(by='vertex')['distance'].to_array()
nx_dist = [i[1] for i in sorted(nx_paths.items())]

assert (cu_dist == nx_dist).all()
4 changes: 2 additions & 2 deletions python/cugraph/traversal/sssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import numpy as np

import cudf
from cugraph.structure import Graph, DiGraph
from cugraph.structure import Graph, DiGraph, MultiGraph, MultiDiGraph
from cugraph.traversal import sssp_wrapper
from cugraph.utilities import (ensure_cugraph_obj,
is_matrix_type,
Expand Down Expand Up @@ -104,7 +104,7 @@ def _convert_df_to_output_type(df, input_type, return_predecessors):
return_predecessors is only used for return values from cupy/scipy input
types.
"""
if input_type in [Graph, DiGraph]:
if input_type in [Graph, DiGraph, MultiGraph, MultiDiGraph]:
return df

elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]):
Expand Down
Loading

0 comments on commit da66ecf

Please sign in to comment.