From 77a08a672d5d48e664646db802fa954e120df9b8 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 10 Oct 2024 06:12:24 +0000 Subject: [PATCH 01/11] Make the fixture polygons valid --- python/cuspatial/cuspatial/tests/conftest.py | 93 ++++++-------------- 1 file changed, 27 insertions(+), 66 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/conftest.py b/python/cuspatial/cuspatial/tests/conftest.py index caa53dcea..ae7166773 100644 --- a/python/cuspatial/cuspatial/tests/conftest.py +++ b/python/cuspatial/cuspatial/tests/conftest.py @@ -27,38 +27,38 @@ def gs(): g6 = MultiLineString((((23, 24), (25, 26)), ((27, 28), (29, 30)))) g7 = LineString(((31, 32), (33, 34))) g8 = Polygon( - ((35, 36), (37, 38), (39, 40), (41, 42)), + ((35, 36), (38, 36), (41, 39), (41, 42)), ) g9 = MultiPolygon( [ ( - ((43, 44), (45, 46), (47, 48)), - [((49, 50), (51, 52), (53, 54))], + ((43, 44), (48, 44), (47, 48)), + [((45, 45), (46, 46), (47, 45))], ), ( - ((55, 56), (57, 58), (59, 60)), - [((61, 62), (63, 64), (65, 66))], + ((55, 56), (60, 56), (59, 60)), + [((57, 57), (58, 58), (59, 57))], ), ] ) g10 = MultiPolygon( [ ( - ((67, 68), (69, 70), (71, 72)), - [((73, 74), (75, 76), (77, 78))], + ((67, 68), (72, 68), (71, 72)), + [((69, 69), (70, 70), (71, 69))], ), ( - ((79, 80), (81, 82), (83, 84)), + ((79, 80), (90, 82), (83, 90)), [ - ((85, 86), (87, 88), (89, 90)), - ((91, 92), (93, 94), (95, 96)), + ((80, 81), (82, 84), (84, 82)), + ((85, 85), (88, 82), (86, 82)), ], ), ] ) g11 = Polygon( ((97, 98), (99, 101), (102, 103), (101, 108)), - [((106, 107), (108, 109), (110, 111), (113, 108))], + [((99, 102), (100, 103), (101, 103), (100, 102))], ) gs = gpd.GeoSeries([g0, g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, g11]) return gs @@ -70,8 +70,10 @@ def gpdf(gs): random_col = int_col np.random.shuffle(random_col) str_col = [str(x) for x in int_col] - key_col = np.repeat(np.arange(4), len(int_col) // 4) + key_col = np.repeat(np.arange(4), (len(int_col) // 4) + 1) + key_col = key_col[: len(int_col)] np.random.shuffle(key_col) + result = gpd.GeoDataFrame( { "geometry": gs, @@ -87,60 +89,19 @@ def gpdf(gs): @pytest.fixture def polys(): - return np.array( - ( - (35, 36), - (37, 38), - (39, 40), - (41, 42), - (35, 36), - (43, 44), - (45, 46), - (47, 48), - (43, 44), - (49, 50), - (51, 52), - (53, 54), - (49, 50), - (55, 56), - (57, 58), - (59, 60), - (55, 56), - (61, 62), - (63, 64), - (65, 66), - (61, 62), - (67, 68), - (69, 70), - (71, 72), - (67, 68), - (73, 74), - (75, 76), - (77, 78), - (73, 74), - (79, 80), - (81, 82), - (83, 84), - (79, 80), - (85, 86), - (87, 88), - (89, 90), - (85, 86), - (91, 92), - (93, 94), - (95, 96), - (91, 92), - (97, 98), - (99, 101), - (102, 103), - (101, 108), - (97, 98), - (106, 107), - (108, 109), - (110, 111), - (113, 108), - (106, 107), - ) + return np.array(( + (35, 36), (38, 36), (41, 39), (41, 42), (35, 36), + (43, 44), (48, 44), (47, 48), (43, 44), + (45, 45), (46, 46), (47, 45), (45, 45), + (55, 56), (60, 56), (59, 60), (55, 56), + (57, 57), (58, 58), (59, 57), (57, 57), + (67, 68), (72, 68), (71, 72), (67, 68), + (69, 69), (70, 70), (71, 69), (69, 69), + (79, 80), (90, 82), (83, 90), (79, 80), + (80, 81), (82, 84), (84, 82), (80, 81), + (85, 85), (88, 82), (86, 82), (85, 85), + (97, 98), (99, 101), (102, 103), (101, 108), (97, 98), + (99, 102), (100, 103), (101, 103), (100, 102), (99, 102)) ) From 800ee74e0f2a28552012d01283bc1569a8bc3ddd Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 10 Oct 2024 06:12:56 +0000 Subject: [PATCH 02/11] Fix `test_from_geoseries_complex` to not depend on specific test fixture data values. --- .../cuspatial/cuspatial/tests/test_from_geopandas.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/test_from_geopandas.py b/python/cuspatial/cuspatial/tests/test_from_geopandas.py index 1c33e216a..a02ab4a35 100644 --- a/python/cuspatial/cuspatial/tests/test_from_geopandas.py +++ b/python/cuspatial/cuspatial/tests/test_from_geopandas.py @@ -36,15 +36,8 @@ def test_dataframe_column_access(gs): def test_from_geoseries_complex(gs): cugs = cuspatial.from_geopandas(gs) - assert cugs.points.xy.sum() == 18 - assert cugs.lines.xy.sum() == 540 - assert cugs.multipoints.xy.sum() == 36 - assert cugs.polygons.xy.sum() == 7436 - assert cugs._column.polygons._column.base_children[0].sum() == 15 - assert ( - cugs._column.polygons._column.base_children[1].base_children[0].sum() - == 38 - ) + gs_roundtrip = cugs.to_geopandas() + gpd.testing.assert_geoseries_equal(gs_roundtrip, gs) def test_from_geopandas_point(): From 5a56633edeb4dc820f2b7a6e67e77b95d9aec8ef Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 10 Oct 2024 06:23:04 +0000 Subject: [PATCH 03/11] Remove xfails --- python/cuspatial/cuspatial/tests/test_cudf_integration.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/test_cudf_integration.py b/python/cuspatial/cuspatial/tests/test_cudf_integration.py index 41214e816..c342afef1 100644 --- a/python/cuspatial/cuspatial/tests/test_cudf_integration.py +++ b/python/cuspatial/cuspatial/tests/test_cudf_integration.py @@ -6,13 +6,7 @@ import cuspatial -reason = ( - "gs fixture contains invalid Polygons/MultiPolygons: " - "https://github.com/libgeos/geos/issues/1177" -) - -@pytest.mark.xfail(reason=reason) def test_sort_index_series(gs): gs.index = np.random.permutation(len(gs)) cugs = cuspatial.from_geopandas(gs) @@ -21,7 +15,6 @@ def test_sort_index_series(gs): gpd.testing.assert_geoseries_equal(got, expected) -@pytest.mark.xfail(reason=reason) def test_sort_index_dataframe(gpdf): gpdf.index = np.random.permutation(len(gpdf)) cugpdf = cuspatial.from_geopandas(gpdf) @@ -30,7 +23,6 @@ def test_sort_index_dataframe(gpdf): gpd.testing.assert_geodataframe_equal(got, expected) -@pytest.mark.xfail(reason=reason) def test_sort_values(gpdf): cugpdf = cuspatial.from_geopandas(gpdf) expected = gpdf.sort_values("random") From db356705b9d60c351859b0fa762435b5b1361791 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 15 Oct 2024 05:08:59 +0000 Subject: [PATCH 04/11] Style --- python/cuspatial/cuspatial/tests/conftest.py | 67 +++++++++++++++---- .../cuspatial/tests/test_cudf_integration.py | 1 - 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/conftest.py b/python/cuspatial/cuspatial/tests/conftest.py index 7d42bf372..9755d4a4f 100644 --- a/python/cuspatial/cuspatial/tests/conftest.py +++ b/python/cuspatial/cuspatial/tests/conftest.py @@ -91,19 +91,60 @@ def gpdf(gs): @pytest.fixture def polys(): - return np.array(( - (35, 36), (38, 36), (41, 39), (41, 42), (35, 36), - (43, 44), (48, 44), (47, 48), (43, 44), - (45, 45), (46, 46), (47, 45), (45, 45), - (55, 56), (60, 56), (59, 60), (55, 56), - (57, 57), (58, 58), (59, 57), (57, 57), - (67, 68), (72, 68), (71, 72), (67, 68), - (69, 69), (70, 70), (71, 69), (69, 69), - (79, 80), (90, 82), (83, 90), (79, 80), - (80, 81), (82, 84), (84, 82), (80, 81), - (85, 85), (88, 82), (86, 82), (85, 85), - (97, 98), (99, 101), (102, 103), (101, 108), (97, 98), - (99, 102), (100, 103), (101, 103), (100, 102), (99, 102)) + return np.array( + ( + (35, 36), + (38, 36), + (41, 39), + (41, 42), + (35, 36), + (43, 44), + (48, 44), + (47, 48), + (43, 44), + (45, 45), + (46, 46), + (47, 45), + (45, 45), + (55, 56), + (60, 56), + (59, 60), + (55, 56), + (57, 57), + (58, 58), + (59, 57), + (57, 57), + (67, 68), + (72, 68), + (71, 72), + (67, 68), + (69, 69), + (70, 70), + (71, 69), + (69, 69), + (79, 80), + (90, 82), + (83, 90), + (79, 80), + (80, 81), + (82, 84), + (84, 82), + (80, 81), + (85, 85), + (88, 82), + (86, 82), + (85, 85), + (97, 98), + (99, 101), + (102, 103), + (101, 108), + (97, 98), + (99, 102), + (100, 103), + (101, 103), + (100, 102), + (99, 102), + ) ) diff --git a/python/cuspatial/cuspatial/tests/test_cudf_integration.py b/python/cuspatial/cuspatial/tests/test_cudf_integration.py index c342afef1..0e67733c0 100644 --- a/python/cuspatial/cuspatial/tests/test_cudf_integration.py +++ b/python/cuspatial/cuspatial/tests/test_cudf_integration.py @@ -2,7 +2,6 @@ import geopandas as gpd import numpy as np import pandas as pd -import pytest import cuspatial From 3061d88f21858293e9906648cc1079d7ae28bca8 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 15 Oct 2024 05:09:54 +0000 Subject: [PATCH 05/11] Better polygon slicing test --- .../cuspatial/tests/test_geoseries.py | 117 +++--------------- 1 file changed, 20 insertions(+), 97 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index 56f61158e..a7f0b3b02 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -401,104 +401,27 @@ def test_geometry_linestring_slicing(gs): def test_geometry_polygon_slicing(gs): + from itertools import chain + + from shapely import get_coordinates + + geom = gs + polys_list = geom[ + geom.apply(lambda x: isinstance(x, (MultiPolygon, Polygon))) + ] + polys = list( + chain(polys_list.apply(get_coordinates)) + ) # flatten multipolygons + coords_list = list(chain(*polys)) # flatten shells and holes + xy_interleaved = list(chain(*coords_list)) # flatten coordinates + x = xy_interleaved[::2] + y = xy_interleaved[1::2] + cugs = cuspatial.from_geopandas(gs) - assert (cugs[:9].polygons.x == cudf.Series([35, 37, 39, 41, 35])).all() - assert (cugs[:9].polygons.y == cudf.Series([36, 38, 40, 42, 36])).all() - assert ( - cugs[:9].polygons.xy - == cudf.Series([35, 36, 37, 38, 39, 40, 41, 42, 35, 36]) - ).all() - assert ( - cugs[:10].polygons.x - == cudf.Series( - [ - 35, - 37, - 39, - 41, - 35, - 43, - 45, - 47, - 43, - 49, - 51, - 53, - 49, - 55, - 57, - 59, - 55, - 61, - 63, - 65, - 61, - ] - ) - ).all() - assert ( - cugs[:10].polygons.y - == cudf.Series( - [ - 36, - 38, - 40, - 42, - 36, - 44, - 46, - 48, - 44, - 50, - 52, - 54, - 50, - 56, - 58, - 60, - 56, - 62, - 64, - 66, - 62, - ] - ) - ).all() - assert ( - cugs[11:].polygons.x - == cudf.Series([97, 99, 102, 101, 97, 106, 108, 110, 113, 106]) - ).all() - assert ( - cugs[11:].polygons.y - == cudf.Series([98, 101, 103, 108, 98, 107, 109, 111, 108, 107]) - ).all() - assert ( - cugs[11:].polygons.xy - == cudf.Series( - [ - 97, - 98, - 99, - 101, - 102, - 103, - 101, - 108, - 97, - 98, - 106, - 107, - 108, - 109, - 110, - 111, - 113, - 108, - 106, - 107, - ] - ) - ).all() + + assert (cugs.polygons.x == cudf.Series(x)).all() + assert (cugs.polygons.y == cudf.Series(y)).all() + assert (cugs.polygons.xy == cudf.Series(xy_interleaved)).all() def test_loc(gs): From be83c2aff1c79d5ab9ab5a7768652d732ac048db Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 15 Oct 2024 05:30:02 +0000 Subject: [PATCH 06/11] Better point and line slicing tests --- .../cuspatial/tests/test_geoseries.py | 79 +++++++------------ 1 file changed, 29 insertions(+), 50 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index a7f0b3b02..fbe8f940e 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -1,6 +1,7 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. from enum import Enum +from itertools import chain from numbers import Integral import cupy as cp @@ -9,6 +10,7 @@ import pandas as pd import pytest from geopandas.testing import assert_geoseries_equal +from shapely import get_coordinates from shapely.affinity import rotate from shapely.geometry import ( LineString, @@ -350,65 +352,42 @@ def test_size(gs, series_slice): assert len(gi) == len(cugs) -def test_geometry_point_slicing(gs): - cugs = cuspatial.from_geopandas(gs) - assert (cugs[:1].points.x == cudf.Series([-1])).all() - assert (cugs[:1].points.y == cudf.Series([0])).all() - assert (cugs[:1].points.xy == cudf.Series([-1, 0])).all() - assert (cugs[3:].points.x == cudf.Series([9])).all() - assert (cugs[3:].points.y == cudf.Series([10])).all() - assert (cugs[3:].points.xy == cudf.Series([9, 10])).all() - assert (cugs[0:4].points.x == cudf.Series([-1, 9])).all() - assert (cugs[0:4].points.y == cudf.Series([0, 10])).all() - assert (cugs[0:4].points.xy == cudf.Series([-1, 0, 9, 10])).all() - - def test_geometry_multipoint_slicing(gs): + points_list = gs[gs.apply(lambda x: isinstance(x, (MultiPoint, Point)))] + points = list( + chain(points_list.apply(get_coordinates)) + ) # flatten multilinestrings/linestrings + coords_list = list(chain(*points)) # flatten linestrings + xy_interleaved = list(chain(*coords_list)) # flatten coordinates + x = xy_interleaved[::2] + y = xy_interleaved[1::2] + cugs = cuspatial.from_geopandas(gs) - assert (cugs[:2].multipoints.x == cudf.Series([1, 3])).all() - assert (cugs[:2].multipoints.y == cudf.Series([2, 4])).all() - assert (cugs[:2].multipoints.xy == cudf.Series([1, 2, 3, 4])).all() - assert (cugs[2:].multipoints.x == cudf.Series([5, 7])).all() - assert (cugs[2:].multipoints.y == cudf.Series([6, 8])).all() - assert (cugs[2:].multipoints.xy == cudf.Series([5, 6, 7, 8])).all() - assert (cugs[0:4].multipoints.x == cudf.Series([1, 3, 5, 7])).all() - assert (cugs[0:4].multipoints.y == cudf.Series([2, 4, 6, 8])).all() - assert ( - cugs[0:4].multipoints.xy == cudf.Series([1, 2, 3, 4, 5, 6, 7, 8]) - ).all() + assert (cugs.points.x == cudf.Series(x)).all() + assert (cugs.points.y == cudf.Series(y)).all() + assert (cugs.points.xy == cudf.Series(xy_interleaved)).all() def test_geometry_linestring_slicing(gs): + lines_list = gs[ + gs.apply(lambda x: isinstance(x, (MultiLineString, LineString))) + ] + lines = list( + chain(lines_list.apply(get_coordinates)) + ) # flatten multilinestrings/linestrings + coords_list = list(chain(*lines)) # flatten linestrings + xy_interleaved = list(chain(*coords_list)) # flatten coordinates + x = xy_interleaved[::2] + y = xy_interleaved[1::2] + cugs = cuspatial.from_geopandas(gs) - assert (cugs[:5].lines.x == cudf.Series([11, 13])).all() - assert (cugs[:5].lines.y == cudf.Series([12, 14])).all() - assert (cugs[:5].lines.xy == cudf.Series([11, 12, 13, 14])).all() - assert (cugs[:6].lines.x == cudf.Series([11, 13, 15, 17, 19, 21])).all() - assert (cugs[:6].lines.y == cudf.Series([12, 14, 16, 18, 20, 22])).all() - assert ( - cugs[:6].lines.xy - == cudf.Series([11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]) - ).all() - assert (cugs[7:].lines.x == cudf.Series([31, 33])).all() - assert (cugs[7:].lines.y == cudf.Series([32, 34])).all() - assert (cugs[7:].lines.xy == cudf.Series([31, 32, 33, 34])).all() - assert (cugs[6:].lines.x == cudf.Series([23, 25, 27, 29, 31, 33])).all() - assert (cugs[6:].lines.y == cudf.Series([24, 26, 28, 30, 32, 34])).all() - assert ( - cugs[6:].lines.xy - == cudf.Series([23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]) - ).all() + assert (cugs.lines.x == cudf.Series(x)).all() + assert (cugs.lines.y == cudf.Series(y)).all() + assert (cugs.lines.xy == cudf.Series(xy_interleaved)).all() def test_geometry_polygon_slicing(gs): - from itertools import chain - - from shapely import get_coordinates - - geom = gs - polys_list = geom[ - geom.apply(lambda x: isinstance(x, (MultiPolygon, Polygon))) - ] + polys_list = gs[gs.apply(lambda x: isinstance(x, (MultiPolygon, Polygon)))] polys = list( chain(polys_list.apply(get_coordinates)) ) # flatten multipolygons From 79f77c6fd37852607a67f0be206e45bb8ffeaad6 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 16 Oct 2024 04:29:12 +0000 Subject: [PATCH 07/11] Fix up point ordering in fixture, delete stale comment --- python/cuspatial/cuspatial/tests/conftest.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/conftest.py b/python/cuspatial/cuspatial/tests/conftest.py index 9755d4a4f..401bf3110 100644 --- a/python/cuspatial/cuspatial/tests/conftest.py +++ b/python/cuspatial/cuspatial/tests/conftest.py @@ -19,9 +19,9 @@ @pytest.fixture def gs(): g0 = Point(-1, 0) - g1 = MultiPoint(((1, 2), (3, 4))) - g2 = MultiPoint(((5, 6), (7, 8))) - g3 = Point(9, 10) + g1 = Point(9, 10) + g2 = MultiPoint(((1, 2), (3, 4))) + g3 = MultiPoint(((5, 6), (7, 8))) g4 = LineString(((11, 12), (13, 14))) g5 = MultiLineString((((15, 16), (17, 18)), ((19, 20), (21, 22)))) g6 = MultiLineString((((23, 24), (25, 26)), ((27, 28), (29, 30)))) @@ -29,8 +29,6 @@ def gs(): g8 = Polygon( ((35, 36), (38, 36), (41, 39), (41, 42)), ) - # TODO: g9, g10, g11 are invalid - # https://github.com/libgeos/geos/issues/1177 g9 = MultiPolygon( [ ( From 5b36ed0b1985bee0e7e0ea667e3d5c82a3fda1fa Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 16 Oct 2024 04:29:33 +0000 Subject: [PATCH 08/11] Fix geometry slicing tests to actually slice --- .../cuspatial/tests/test_geoseries.py | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index fbe8f940e..c9259ece4 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -352,35 +352,54 @@ def test_size(gs, series_slice): assert len(gi) == len(cugs) -def test_geometry_multipoint_slicing(gs): - points_list = gs[gs.apply(lambda x: isinstance(x, (MultiPoint, Point)))] +def test_geometry_point_slicing(gs): + points_list = gs[gs.apply(lambda x: isinstance(x, Point))] points = list( chain(points_list.apply(get_coordinates)) - ) # flatten multilinestrings/linestrings - coords_list = list(chain(*points)) # flatten linestrings + ) # flatten multigeometries + coords_list = list(chain(*points)) # flatten geometries xy_interleaved = list(chain(*coords_list)) # flatten coordinates x = xy_interleaved[::2] y = xy_interleaved[1::2] - cugs = cuspatial.from_geopandas(gs) + # slice a superset of point geometries and then extract the points + cugs = cuspatial.from_geopandas(gs)[0:6] assert (cugs.points.x == cudf.Series(x)).all() assert (cugs.points.y == cudf.Series(y)).all() assert (cugs.points.xy == cudf.Series(xy_interleaved)).all() +def test_geometry_multipoint_slicing(gs): + points_list = gs[gs.apply(lambda x: isinstance(x, MultiPoint))] + points = list( + chain(points_list.apply(get_coordinates)) + ) # flatten multigeometries + coords_list = list(chain(*points)) # flatten geometries + xy_interleaved = list(chain(*coords_list)) # flatten coordinates + x = xy_interleaved[::2] + y = xy_interleaved[1::2] + + # slice a superset of multipoint geometries and then extract the multipoints + cugs = cuspatial.from_geopandas(gs)[2:8] + assert (cugs.multipoints.x == cudf.Series(x)).all() + assert (cugs.multipoints.y == cudf.Series(y)).all() + assert (cugs.multipoints.xy == cudf.Series(xy_interleaved)).all() + + def test_geometry_linestring_slicing(gs): lines_list = gs[ gs.apply(lambda x: isinstance(x, (MultiLineString, LineString))) ] lines = list( chain(lines_list.apply(get_coordinates)) - ) # flatten multilinestrings/linestrings - coords_list = list(chain(*lines)) # flatten linestrings + ) # flatten multigeometries + coords_list = list(chain(*lines)) # flatten geometries xy_interleaved = list(chain(*coords_list)) # flatten coordinates x = xy_interleaved[::2] y = xy_interleaved[1::2] - cugs = cuspatial.from_geopandas(gs) + # slice a superset of line geometries and then extract the lines + cugs = cuspatial.from_geopandas(gs)[2:10] assert (cugs.lines.x == cudf.Series(x)).all() assert (cugs.lines.y == cudf.Series(y)).all() assert (cugs.lines.xy == cudf.Series(xy_interleaved)).all() @@ -390,14 +409,14 @@ def test_geometry_polygon_slicing(gs): polys_list = gs[gs.apply(lambda x: isinstance(x, (MultiPolygon, Polygon)))] polys = list( chain(polys_list.apply(get_coordinates)) - ) # flatten multipolygons - coords_list = list(chain(*polys)) # flatten shells and holes + ) # flatten multigeometries + coords_list = list(chain(*polys)) # flatten geometries xy_interleaved = list(chain(*coords_list)) # flatten coordinates x = xy_interleaved[::2] y = xy_interleaved[1::2] - cugs = cuspatial.from_geopandas(gs) - + # slice a superset of polygon geometries and then extract the polygons + cugs = cuspatial.from_geopandas(gs)[6:12] assert (cugs.polygons.x == cudf.Series(x)).all() assert (cugs.polygons.y == cudf.Series(y)).all() assert (cugs.polygons.xy == cudf.Series(xy_interleaved)).all() From c082c4ea3195e4b12048e45a428bd7917261f02e Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 16 Oct 2024 06:20:08 +0000 Subject: [PATCH 09/11] Remove redundant polys fixture --- python/cuspatial/cuspatial/tests/conftest.py | 59 ------------------- .../cuspatial/tests/test_geodataframe.py | 23 ++++++-- .../cuspatial/tests/test_geoseries.py | 19 ++++-- 3 files changed, 32 insertions(+), 69 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/conftest.py b/python/cuspatial/cuspatial/tests/conftest.py index 401bf3110..a8ecb11ec 100644 --- a/python/cuspatial/cuspatial/tests/conftest.py +++ b/python/cuspatial/cuspatial/tests/conftest.py @@ -87,65 +87,6 @@ def gpdf(gs): return result -@pytest.fixture -def polys(): - return np.array( - ( - (35, 36), - (38, 36), - (41, 39), - (41, 42), - (35, 36), - (43, 44), - (48, 44), - (47, 48), - (43, 44), - (45, 45), - (46, 46), - (47, 45), - (45, 45), - (55, 56), - (60, 56), - (59, 60), - (55, 56), - (57, 57), - (58, 58), - (59, 57), - (57, 57), - (67, 68), - (72, 68), - (71, 72), - (67, 68), - (69, 69), - (70, 70), - (71, 69), - (69, 69), - (79, 80), - (90, 82), - (83, 90), - (79, 80), - (80, 81), - (82, 84), - (84, 82), - (80, 81), - (85, 85), - (88, 82), - (86, 82), - (85, 85), - (97, 98), - (99, 101), - (102, 103), - (101, 108), - (97, 98), - (99, 102), - (100, 103), - (101, 103), - (100, 102), - (99, 102), - ) - ) - - @pytest.fixture def gs_sorted(gs): result = pd.concat( diff --git a/python/cuspatial/cuspatial/tests/test_geodataframe.py b/python/cuspatial/cuspatial/tests/test_geodataframe.py index f834b35e5..41247e2e6 100644 --- a/python/cuspatial/cuspatial/tests/test_geodataframe.py +++ b/python/cuspatial/cuspatial/tests/test_geodataframe.py @@ -1,11 +1,13 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. import sys +from itertools import chain import geopandas as gpd import numpy as np import pandas as pd import pytest from geopandas.testing import assert_geodataframe_equal +from shapely import get_coordinates from shapely.affinity import rotate from shapely.geometry import ( LineString, @@ -114,7 +116,7 @@ def test_type_persistence(gpdf): assert type(cugpdf["geometry"]) is cuspatial.GeoSeries -def test_interleaved_point(gpdf, polys): +def test_interleaved_point(gpdf): cugpdf = cuspatial.from_geopandas(gpdf) cugs = cugpdf["geometry"] gs = gpdf["geometry"] @@ -128,7 +130,7 @@ def test_interleaved_point(gpdf, polys): ) -def test_interleaved_multipoint(gpdf, polys): +def test_interleaved_multipoint(gpdf): cugpdf = cuspatial.from_geopandas(gpdf) cugs = cugpdf["geometry"] gs = gpdf["geometry"] @@ -156,7 +158,7 @@ def test_interleaved_multipoint(gpdf, polys): ) -def test_interleaved_lines(gpdf, polys): +def test_interleaved_lines(gpdf): cugpdf = cuspatial.from_geopandas(gpdf) cugs = cugpdf["geometry"] cudf.testing.assert_series_equal( @@ -175,16 +177,25 @@ def test_interleaved_lines(gpdf, polys): ) -def test_interleaved_polygons(gpdf, polys): +def test_interleaved_polygons(gpdf): cugpdf = cuspatial.from_geopandas(gpdf) cugs = cugpdf["geometry"] + gs = gpdf["geometry"] + polys_list = gs[gs.apply(lambda x: isinstance(x, (MultiPolygon, Polygon)))] + # flatten multigeometries + polys = list(chain(polys_list.apply(get_coordinates))) + coords_list = list(chain(*polys)) # flatten geometries + xy_interleaved = list(chain(*coords_list)) # flatten coordinates + x = xy_interleaved[::2] + y = xy_interleaved[1::2] + cudf.testing.assert_series_equal( cudf.Series.from_arrow(cugs.polygons.x.to_arrow()), - cudf.Series(polys[:, 0], dtype="float64"), + cudf.Series(x, dtype="float64"), ) cudf.testing.assert_series_equal( cudf.Series.from_arrow(cugs.polygons.y.to_arrow()), - cudf.Series(polys[:, 1], dtype="float64"), + cudf.Series(y, dtype="float64"), ) diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index c9259ece4..34db706d7 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -135,7 +135,7 @@ def assert_eq_geo(geo1, geo2): assert result.all() -def test_interleaved_point(gs, polys): +def test_interleaved_point(gs): cugs = cuspatial.from_geopandas(gs) pd.testing.assert_series_equal( cugs.points.x.to_pandas(), @@ -183,13 +183,23 @@ def test_interleaved_point(gs, polys): dtype="float64", ).reset_index(drop=True), ) + + polys_list = gs[gs.apply(lambda x: isinstance(x, (MultiPolygon, Polygon)))] + polys = list( + chain(polys_list.apply(get_coordinates)) + ) # flatten multigeometries + coords_list = list(chain(*polys)) # flatten geometries + xy_interleaved = list(chain(*coords_list)) # flatten coordinates + x = xy_interleaved[::2] + y = xy_interleaved[1::2] + cudf.testing.assert_series_equal( cugs.polygons.x.reset_index(drop=True), - cudf.Series(polys[:, 0], dtype="float64").reset_index(drop=True), + cudf.Series(x, dtype="float64").reset_index(drop=True), ) cudf.testing.assert_series_equal( cugs.polygons.y.reset_index(drop=True), - cudf.Series(polys[:, 1], dtype="float64").reset_index(drop=True), + cudf.Series(y, dtype="float64").reset_index(drop=True), ) @@ -379,7 +389,8 @@ def test_geometry_multipoint_slicing(gs): x = xy_interleaved[::2] y = xy_interleaved[1::2] - # slice a superset of multipoint geometries and then extract the multipoints + # slice a superset of multipoint geometries and then + # extract the multipoints cugs = cuspatial.from_geopandas(gs)[2:8] assert (cugs.multipoints.x == cudf.Series(x)).all() assert (cugs.multipoints.y == cudf.Series(y)).all() From 1e961844296db93599561374c8cd27daa0d16fa9 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 17 Oct 2024 01:32:52 +0000 Subject: [PATCH 10/11] Add helper and DRY up tests --- python/cuspatial/cuspatial/testing/helpers.py | 15 +++ .../cuspatial/tests/test_geodataframe.py | 11 +-- .../cuspatial/tests/test_geoseries.py | 93 +++++-------------- 3 files changed, 41 insertions(+), 78 deletions(-) create mode 100644 python/cuspatial/cuspatial/testing/helpers.py diff --git a/python/cuspatial/cuspatial/testing/helpers.py b/python/cuspatial/cuspatial/testing/helpers.py new file mode 100644 index 000000000..a03ab2d57 --- /dev/null +++ b/python/cuspatial/cuspatial/testing/helpers.py @@ -0,0 +1,15 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from itertools import chain + +from shapely import get_coordinates + + +def geometry_to_coords(geom, geom_types): + points_list = geom[geom.apply(lambda x: isinstance(x, geom_types))] + # flatten multigeometries, then geometries, then coordinates + points = list(chain(points_list.apply(get_coordinates))) + coords_list = list(chain(*points)) + xy = list(chain(*coords_list)) + x = xy[::2] + y = xy[1::2] + return xy, x, y diff --git a/python/cuspatial/cuspatial/tests/test_geodataframe.py b/python/cuspatial/cuspatial/tests/test_geodataframe.py index 41247e2e6..af8b7334e 100644 --- a/python/cuspatial/cuspatial/tests/test_geodataframe.py +++ b/python/cuspatial/cuspatial/tests/test_geodataframe.py @@ -1,13 +1,11 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. import sys -from itertools import chain import geopandas as gpd import numpy as np import pandas as pd import pytest from geopandas.testing import assert_geodataframe_equal -from shapely import get_coordinates from shapely.affinity import rotate from shapely.geometry import ( LineString, @@ -21,6 +19,7 @@ import cudf import cuspatial +from cuspatial.testing.helpers import geometry_to_coords np.random.seed(0) @@ -181,13 +180,7 @@ def test_interleaved_polygons(gpdf): cugpdf = cuspatial.from_geopandas(gpdf) cugs = cugpdf["geometry"] gs = gpdf["geometry"] - polys_list = gs[gs.apply(lambda x: isinstance(x, (MultiPolygon, Polygon)))] - # flatten multigeometries - polys = list(chain(polys_list.apply(get_coordinates))) - coords_list = list(chain(*polys)) # flatten geometries - xy_interleaved = list(chain(*coords_list)) # flatten coordinates - x = xy_interleaved[::2] - y = xy_interleaved[1::2] + xy, x, y = geometry_to_coords(gs, (Polygon, MultiPolygon)) cudf.testing.assert_series_equal( cudf.Series.from_arrow(cugs.polygons.x.to_arrow()), diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index 34db706d7..27c8ab4df 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -25,6 +25,7 @@ from cudf.testing import assert_series_equal import cuspatial +from cuspatial.testing.helpers import geometry_to_coords np.random.seed(0) @@ -362,75 +363,29 @@ def test_size(gs, series_slice): assert len(gi) == len(cugs) -def test_geometry_point_slicing(gs): - points_list = gs[gs.apply(lambda x: isinstance(x, Point))] - points = list( - chain(points_list.apply(get_coordinates)) - ) # flatten multigeometries - coords_list = list(chain(*points)) # flatten geometries - xy_interleaved = list(chain(*coords_list)) # flatten coordinates - x = xy_interleaved[::2] - y = xy_interleaved[1::2] - - # slice a superset of point geometries and then extract the points - cugs = cuspatial.from_geopandas(gs)[0:6] - assert (cugs.points.x == cudf.Series(x)).all() - assert (cugs.points.y == cudf.Series(y)).all() - assert (cugs.points.xy == cudf.Series(xy_interleaved)).all() - - -def test_geometry_multipoint_slicing(gs): - points_list = gs[gs.apply(lambda x: isinstance(x, MultiPoint))] - points = list( - chain(points_list.apply(get_coordinates)) - ) # flatten multigeometries - coords_list = list(chain(*points)) # flatten geometries - xy_interleaved = list(chain(*coords_list)) # flatten coordinates - x = xy_interleaved[::2] - y = xy_interleaved[1::2] - - # slice a superset of multipoint geometries and then - # extract the multipoints - cugs = cuspatial.from_geopandas(gs)[2:8] - assert (cugs.multipoints.x == cudf.Series(x)).all() - assert (cugs.multipoints.y == cudf.Series(y)).all() - assert (cugs.multipoints.xy == cudf.Series(xy_interleaved)).all() - - -def test_geometry_linestring_slicing(gs): - lines_list = gs[ - gs.apply(lambda x: isinstance(x, (MultiLineString, LineString))) - ] - lines = list( - chain(lines_list.apply(get_coordinates)) - ) # flatten multigeometries - coords_list = list(chain(*lines)) # flatten geometries - xy_interleaved = list(chain(*coords_list)) # flatten coordinates - x = xy_interleaved[::2] - y = xy_interleaved[1::2] - - # slice a superset of line geometries and then extract the lines - cugs = cuspatial.from_geopandas(gs)[2:10] - assert (cugs.lines.x == cudf.Series(x)).all() - assert (cugs.lines.y == cudf.Series(y)).all() - assert (cugs.lines.xy == cudf.Series(xy_interleaved)).all() - - -def test_geometry_polygon_slicing(gs): - polys_list = gs[gs.apply(lambda x: isinstance(x, (MultiPolygon, Polygon)))] - polys = list( - chain(polys_list.apply(get_coordinates)) - ) # flatten multigeometries - coords_list = list(chain(*polys)) # flatten geometries - xy_interleaved = list(chain(*coords_list)) # flatten coordinates - x = xy_interleaved[::2] - y = xy_interleaved[1::2] - - # slice a superset of polygon geometries and then extract the polygons - cugs = cuspatial.from_geopandas(gs)[6:12] - assert (cugs.polygons.x == cudf.Series(x)).all() - assert (cugs.polygons.y == cudf.Series(y)).all() - assert (cugs.polygons.xy == cudf.Series(xy_interleaved)).all() +@pytest.mark.parametrize( + "geom_access", + [ + # Tuples: accessor, types, slice + # slices here are meant to be supersets of the range in the gs fixture + # that contains the types of geometries being accessed + # Note that cuspatial.GeoSeries provides accessors for "multipoints", + # but not for "multilinestrings" or "multipolygons" + # (inconsistent interface) + ("points", Point, slice(0, 6)), + ("multipoints", MultiPoint, slice(2, 8)), + ("lines", (LineString, MultiLineString), slice(2, 10)), + ("polygons", (Polygon, MultiPolygon), slice(6, 12)), + ], +) +def test_geometry_access_slicing(gs, geom_access): + accessor, types, slice = geom_access + xy, x, y = geometry_to_coords(gs, types) + + cugs = cuspatial.from_geopandas(gs)[slice] + assert (getattr(cugs, accessor).x == cudf.Series(x)).all() + assert (getattr(cugs, accessor).y == cudf.Series(y)).all() + assert (getattr(cugs, accessor).xy == cudf.Series(xy)).all() def test_loc(gs): From ccb9841b2207b9f3e5f4591d281ba78c006d81e5 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 17 Oct 2024 01:44:43 +0000 Subject: [PATCH 11/11] DRY up another test --- python/cuspatial/cuspatial/tests/test_geoseries.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/test_geoseries.py b/python/cuspatial/cuspatial/tests/test_geoseries.py index 27c8ab4df..5304d7ea6 100644 --- a/python/cuspatial/cuspatial/tests/test_geoseries.py +++ b/python/cuspatial/cuspatial/tests/test_geoseries.py @@ -1,7 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. from enum import Enum -from itertools import chain from numbers import Integral import cupy as cp @@ -10,7 +9,6 @@ import pandas as pd import pytest from geopandas.testing import assert_geoseries_equal -from shapely import get_coordinates from shapely.affinity import rotate from shapely.geometry import ( LineString, @@ -185,14 +183,7 @@ def test_interleaved_point(gs): ).reset_index(drop=True), ) - polys_list = gs[gs.apply(lambda x: isinstance(x, (MultiPolygon, Polygon)))] - polys = list( - chain(polys_list.apply(get_coordinates)) - ) # flatten multigeometries - coords_list = list(chain(*polys)) # flatten geometries - xy_interleaved = list(chain(*coords_list)) # flatten coordinates - x = xy_interleaved[::2] - y = xy_interleaved[1::2] + xy, x, y = geometry_to_coords(gs, (MultiPolygon, Polygon)) cudf.testing.assert_series_equal( cugs.polygons.x.reset_index(drop=True),