Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update unit tests to leverage the datasets API #2733

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
bed80ad
update raft import
jnke2016 Sep 24, 2022
d4e8c61
reset changes to the yml files
jnke2016 Sep 24, 2022
101f35b
fix typo
jnke2016 Sep 24, 2022
c3950c3
Merge remote-tracking branch 'upstream/branch-22.10_fix-raft-import' …
jnke2016 Sep 25, 2022
5f13edb
add karate-disjoint to the datasets API
jnke2016 Sep 25, 2022
5de6972
update tests to leverage the datasets API
jnke2016 Sep 25, 2022
4c6e46e
remove excessive spacing
jnke2016 Sep 25, 2022
fed0be3
update test to leverage the datasets API
jnke2016 Sep 25, 2022
fa32e1b
add 'DATASETS_UNRENUMBERED' to the datasets API
jnke2016 Sep 25, 2022
162de8c
fix style
jnke2016 Sep 25, 2022
12a8f0b
add new datasets to the datasets API
jnke2016 Oct 4, 2022
c04b093
update test to leverage the datasets API
jnke2016 Oct 4, 2022
5f3f6e8
fecth latest changes
jnke2016 Oct 4, 2022
b02a9d5
clean code
jnke2016 Oct 4, 2022
9e59b14
enable retrieval of the dataset path without having to create the edg…
jnke2016 Oct 4, 2022
ce1f4ba
add new datasets to the datasets API
jnke2016 Oct 4, 2022
b5492e4
fix bug
jnke2016 Oct 4, 2022
47b2231
update test to leverage the datasets API
jnke2016 Oct 4, 2022
4027a00
add 'header' as an option when converting a csv to cudf
jnke2016 Oct 5, 2022
607e58b
add 'header' as an option when converting a csv to cudf
jnke2016 Oct 5, 2022
0be13b1
add 'header' as an option when converting a csv to cudf
jnke2016 Oct 5, 2022
4942f29
update test to leverage the datasets API
jnke2016 Oct 5, 2022
318ac8e
update test to leverage the datasets API
jnke2016 Oct 5, 2022
824b71d
clean file
jnke2016 Oct 5, 2022
d05dbee
update test to leverage the datasets API
jnke2016 Oct 5, 2022
e77f259
fix typo
jnke2016 Oct 5, 2022
4c7c74c
create directed graph where required
jnke2016 Oct 5, 2022
6151c41
remove outdated test
jnke2016 Oct 5, 2022
770f59c
change the retrieval of the dataset path
jnke2016 Oct 5, 2022
721f8ca
remove unused import
jnke2016 Oct 5, 2022
6cafa52
Merge remote-tracking branch 'upstream/branch-22.10' into branch-22.1…
jnke2016 Oct 5, 2022
cc0c2df
update notebook
jnke2016 Oct 5, 2022
c48abcf
add new line
jnke2016 Oct 5, 2022
26b769a
revert end of line
jnke2016 Oct 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions notebooks/algorithms/structure/Renumber-2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@
"outputs": [],
"source": [
"# Since IP columns are strings, we first need to convert them to integers\n",
"gdf['src_ip'] = gdf['src'].str.ip2int()\n",
"gdf['dst_ip'] = gdf['dst'].str.ip2int()"
"gdf['src_ip'] = gdf['srcip'].str.ip2int()\n",
"gdf['dst_ip'] = gdf['dstip'].str.ip2int()"
]
},
{
Expand Down Expand Up @@ -253,4 +253,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
22 changes: 18 additions & 4 deletions python/cugraph/cugraph/experimental/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,31 @@
karate_data = Dataset(meta_path / "karate_data.yaml")
karate_undirected = Dataset(meta_path / "karate_undirected.yaml")
karate_asymmetric = Dataset(meta_path / "karate_asymmetric.yaml")
karate_disjoint = Dataset(meta_path / "karate-disjoint.yaml")
dolphins = Dataset(meta_path / "dolphins.yaml")
polbooks = Dataset(meta_path / "polbooks.yaml")
netscience = Dataset(meta_path / "netscience.yaml")
cyber = Dataset(meta_path / "cyber.yaml")
small_line = Dataset(meta_path / "small_line.yaml")
small_tree = Dataset(meta_path / "small_tree.yaml")
toy_graph = Dataset(meta_path / "toy_graph.yaml")
toy_graph_undirected = Dataset(meta_path / "toy_graph_undirected.yaml")
email_Eu_core = Dataset(meta_path / "email-Eu-core.yaml")
ktruss_polbooks = Dataset(meta_path / "ktruss_polbooks.yaml")

DATASETS_UNDIRECTED = [karate, dolphins]

DATASETS_UNDIRECTED_WEIGHTS = [netscience]

DATASETS_UNRENUMBERED = [karate_disjoint]

DATASETS = [dolphins, netscience, karate_disjoint]

DATASETS_SMALL = [karate, dolphins, polbooks]

STRONGDATASETS = [dolphins, netscience, email_Eu_core]

DATASETS_KTRUSS = [(polbooks, ktruss_polbooks)]

MEDIUM_DATASETS = [polbooks]

Expand All @@ -51,7 +69,3 @@
small_line, small_tree]

TEST_GROUP = [dolphins, netscience]

DATASETS_KTRUSS = [polbooks]

DATASETS_UNDIRECTED = [karate_undirected, small_line, karate_asymmetric]
22 changes: 13 additions & 9 deletions python/cugraph/cugraph/experimental/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def __init__(self, meta_data_file_name):
self._edgelist = None
self._graph = None
self._path = None
"""
self._path = self._dl_path.path / (self.metadata['name'] +
self.metadata['file_type'])
"""

def __download_csv(self, url):
self._dl_path.path.mkdir(parents=True, exist_ok=True)
Expand All @@ -98,22 +102,22 @@ def get_edgelist(self, fetch=False):
"""

if self._edgelist is None:
full_path = self._dl_path.path / (self.metadata['name'] +
self.metadata['file_type'])

full_path = self.get_path()
if not full_path.is_file():
if fetch:
self.__download_csv(self.metadata['url'])
else:
raise RuntimeError(f"The datafile {full_path} does not"
" exist. Try get_edgelist(fetch=True)"
" to download the datafile")

header = None
if isinstance(self.metadata['header'], int):
header = self.metadata['header']
self._edgelist = cudf.read_csv(full_path,
delimiter=self.metadata['delim'],
names=self.metadata['col_names'],
dtype=self.metadata['col_types'])
self._path = full_path
dtype=self.metadata['col_types'],
header=header)

return self._edgelist

Expand Down Expand Up @@ -144,6 +148,7 @@ def get_graph(self, fetch=False, create_using=Graph, ignore_weights=False):
if create_using is None:
self._graph = Graph()
elif isinstance(create_using, Graph):
# what about BFS if trnaposed is True
attrs = {"directed": create_using.is_directed()}
self._graph = type(create_using)(**attrs)
elif type(create_using) is type:
Expand All @@ -166,9 +171,8 @@ def get_path(self):
"""
Returns the location of the stored dataset file
"""
if self._path is None:
raise RuntimeError("Path to datafile has not been set." +
" Call get_edgelist or get_graph first")
self._path = self._dl_path.path / (self.metadata['name'] +
self.metadata['file_type'])

return self._path.absolute()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,18 @@ url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/cy
refs: N/A
col_names:
- idx
- src
- dst
- srcip
- dstip
col_types:
- int32
- str
- str
delim: ","
header: 0
has_loop: true
is_directed: true
is_multigraph: false
is_symmetric: false
number_of_edges: 54
number_of_nodes: 314
number_of_edges: 2546575
number_of_nodes: 706529
number_of_lines: 2546576
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ col_types:
- int32
- float32
delim: " "
header: None
has_loop: false
is_directed: true
is_multigraph: false
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: email-Eu-core
file_type: .csv
author: null
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/email-Eu-core.csv
refs: null
delim: " "
header: None
col_names:
- src
- dst
- wgt
col_types:
- int32
- int32
- float32
has_loop: false
is_directed: false
is_multigraph: false
is_symmetric: true
number_of_edges: 25571
number_of_nodes: 1005
number_of_lines: 25571
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: karate-disjoint
file_type: .csv
author: null
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/karate-disjoint.csv
refs: null
delim: " "
header: None
col_names:
- src
- dst
- wgt
col_types:
- int32
- int32
- float32
has_loop: false
is_directed: True
is_multigraph: false
is_symmetric: true
number_of_edges: 312
number_of_nodes: 68
number_of_lines: 312
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ refs:
W. W. Zachary, An information flow model for conflict and fission in small groups,
Journal of Anthropological Research 33, 452-473 (1977).
delim: " "
header: None
col_names:
- src
- dst
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ name: karate-asymmetric
file_type: .csv
author: Zachary W.
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/karate-asymmetric.csv
delim: " "
header: None
refs:
W. W. Zachary, An information flow model for conflict and fission in small groups,
Journal of Anthropological Research 33, 452-473 (1977).
delim: "\t"
col_names:
- src
- dst
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ refs:
W. W. Zachary, An information flow model for conflict and fission in small groups,
Journal of Anthropological Research 33, 452-473 (1977).
delim: "\t"
header: None
col_names:
- src
- dst
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ refs:
W. W. Zachary, An information flow model for conflict and fission in small groups,
Journal of Anthropological Research 33, 452-473 (1977).
delim: "\t"
header: None
col_names:
- src
- dst
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: ktruss_polbooks
file_type: .csv
author: null
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/ref/ktruss/polbooks.csv
refs: null
delim: " "
header: None
col_names:
- src
- dst
- wgt
col_types:
- int32
- int32
- float32
has_loop: false
is_directed: true
is_multigraph: false
is_symmetric: false
number_of_edges: 233
number_of_nodes: 58
number_of_lines: 233

Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ author: Newman, Mark EJ
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/netscience.csv
refs: Finding community structure in networks using the eigenvectors of matrices.
delim: " "
header: None
col_names:
- src
- dst
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ author: V. Krebs
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/polbooks.csv
refs: null
delim: " "
header: None
col_names:
- src
- dst
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ author: null
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/small_line.csv
refs: null
delim: " "
header: None
col_names:
- src
- dst
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ author: null
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/small_tree.csv
refs: null
delim: " "
header: None
col_names:
- src
- dst
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: toy_graph
file_type: .csv
author: null
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/toy_graph.csv
refs: null
delim: " "
header: None
col_names:
- src
- dst
- wgt
col_types:
- int32
- int32
- float32
has_loop: false
is_directed: false
is_multigraph: false
is_symmetric: true
number_of_edges: 16
number_of_nodes: 6
number_of_lines: 16
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: toy_graph_undirected
file_type: .csv
author: null
url: https://raw.githubusercontent.com/rapidsai/cugraph/branch-22.08/datasets/toy_graph_undirected.csv
refs: null
delim: " "
header: None
col_names:
- src
- dst
- wgt
col_types:
- int32
- int32
- float32
has_loop: false
is_directed: false
is_multigraph: false
is_symmetric: true
number_of_edges: 8
number_of_nodes: 6
number_of_lines: 8
Loading