From bb9f3ab6f92942b4a683f5bcb7a88626c0cf6952 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 20 Apr 2023 02:08:03 +0200 Subject: [PATCH] GPKG: add ST_EnvIntersects() for faster spatial filtering when there is no spatial index Execution time can be ~ 1.5x faster. Before: ``` $ time ogrinfo nz-building-outlines.gpkg -sql "select count(*) from \"nz-building-outlines\" where st_maxx(geometry)>=1500000 and st_maxy(geometry)>= 5200000 and st_minx(geometry) <=1800000 and st_miny(geometry) <= 5800000" -al -q Layer name: SELECT OGRFeature(SELECT):0 count(*) (Integer) = 701300 real 0m0,909s user 0m0,633s sys 0m0,276s ``` After: ``` $ time ogrinfo nz-building-outlines.gpkg -sql "select count(*) from \"nz-building-outlines\" where ST_EnvelopesIntersects(geometry, 1500000, 5200000 ,1800000 ,5800000)" -al -q Layer name: SELECT OGRFeature(SELECT):0 count(*) (Integer) = 701300 real 0m0,605s user 0m0,310s sys 0m0,290s ``` --- autotest/ogr/ogr_gpkg.py | 73 ++++++++++++++ doc/source/drivers/vector/gpkg.rst | 7 ++ .../gpkg/ogrgeopackagedatasource.cpp | 95 ++++++++++++++++++- .../gpkg/ogrgeopackagetablelayer.cpp | 8 +- 4 files changed, 174 insertions(+), 9 deletions(-) diff --git a/autotest/ogr/ogr_gpkg.py b/autotest/ogr/ogr_gpkg.py index 2714008100d6..176e36c54254 100755 --- a/autotest/ogr/ogr_gpkg.py +++ b/autotest/ogr/ogr_gpkg.py @@ -1316,6 +1316,79 @@ def test_ogr_gpkg_SetSRID(): gdal.Unlink("/vsimem/test_ogr_gpkg_SetSRID.gpkg") +############################################################################### +# Test ST_EnvIntersects() function + + +def test_ogr_gpkg_ST_EnvIntersects(): + + filename = "/vsimem/test_ogr_gpkg_ST_EnvIntersects.gpkg" + ds = ogr.GetDriverByName("GPKG").CreateDataSource(filename) + lyr = ds.CreateLayer("foo") + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometry(ogr.CreateGeometryFromWkt("LINESTRING(1 2,3 4)")) + lyr.CreateFeature(f) + + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetGeometry(ogr.CreateGeometryFromWkt("LINESTRING(5 6,7 8)")) + lyr.CreateFeature(f) + + sql_lyr = ds.ExecuteSQL( + "SELECT ST_EnvIntersects(geom, 0, 0, 0.99, 100)," + + " ST_EnvIntersects(geom, 0, 4.01, 100, 100)," + + " ST_EnvIntersects(geom, 3.01, 0, 100, 100)," + + " ST_EnvIntersects(geom, 0, 0, 100, 1.99)," + + " ST_EnvIntersects(geom, 0.99, 1.99, 1.01, 2.01)," + + " ST_EnvIntersects(geom, 0.99, 3.99, 1.01, 4.01)," + + " ST_EnvIntersects(geom, 2.99, 3.99, 3.01, 4.01)," + + " ST_EnvIntersects(geom, 2.99, 1.99, 3.01, 2.01)" + + " FROM foo WHERE fid = 1" + ) + f = sql_lyr.GetNextFeature() + try: + assert f.GetField(0) == 0 + assert f.GetField(1) == 0 + assert f.GetField(2) == 0 + assert f.GetField(3) == 0 + assert f.GetField(4) == 1 + assert f.GetField(5) == 1 + assert f.GetField(6) == 1 + assert f.GetField(7) == 1 + finally: + ds.ReleaseResultSet(sql_lyr) + + sql_lyr = ds.ExecuteSQL( + "SELECT ST_EnvIntersects(a.geom, b.geom) FROM foo a, foo b WHERE a.fid = 1 AND b.fid = 1" + ) + f = sql_lyr.GetNextFeature() + try: + assert f.GetField(0) == 1 + finally: + ds.ReleaseResultSet(sql_lyr) + + sql_lyr = ds.ExecuteSQL( + "SELECT ST_EnvIntersects(a.geom, b.geom) FROM foo a, foo b WHERE a.fid = 1 AND b.fid = 2" + ) + f = sql_lyr.GetNextFeature() + try: + assert f.GetField(0) == 0 + finally: + ds.ReleaseResultSet(sql_lyr) + + sql_lyr = ds.ExecuteSQL( + "SELECT ST_EnvIntersects(a.geom, b.geom) FROM foo a, foo b WHERE a.fid = 2 AND b.fid = 1" + ) + f = sql_lyr.GetNextFeature() + try: + assert f.GetField(0) == 0 + finally: + ds.ReleaseResultSet(sql_lyr) + + ds = None + gdal.Unlink("/vsimem/test_ogr_gpkg_ST_EnvIntersects.gpkg") + + ############################################################################### # Test unknown extensions diff --git a/doc/source/drivers/vector/gpkg.rst b/doc/source/drivers/vector/gpkg.rst index 361fedd5a2d0..775fd6af7478 100644 --- a/doc/source/drivers/vector/gpkg.rst +++ b/doc/source/drivers/vector/gpkg.rst @@ -123,6 +123,13 @@ Spatialite, are also available : to the SRS of specified srs_id. If no SRS with that given srs_id is not found in gpkg_spatial_ref_sys, starting with GDAL 3.2, it will be interpreted as a EPSG code. +- ST_EnvIntersects(geom *Geometry*, minx *Double*, miny *Double*, maxx *Double*, maxy *Double*): + (GDAL >= 3.7) Returns 1 if the minimum bounding box of geom intersects the + bounding box defined by (minx,miny)-(maxx,maxy), or 0 otherwise. +- ST_EnvIntersects(geom1 *Geometry*, geom2 *Geometry*): + (GDAL >= 3.7) Returns 1 if the minimum bounding box of geom1 intersects the + one of geom2, or 0 otherwise. (Note: this function, as all others, does not + automatically uses spatial indices) The raster SQL functions mentioned at :ref:`raster.gpkg.raster` are also available. diff --git a/ogr/ogrsf_frmts/gpkg/ogrgeopackagedatasource.cpp b/ogr/ogrsf_frmts/gpkg/ogrgeopackagedatasource.cpp index 6781e01f4539..aec5d2c1031f 100644 --- a/ogr/ogrsf_frmts/gpkg/ogrgeopackagedatasource.cpp +++ b/ogr/ogrsf_frmts/gpkg/ogrgeopackagedatasource.cpp @@ -7524,16 +7524,16 @@ OGRErr GDALGeoPackageDataset::CreateExtensionsTableIfNecessary() static bool OGRGeoPackageGetHeader(sqlite3_context *pContext, int /*argc*/, sqlite3_value **argv, GPkgHeader *psHeader, - bool bNeedExtent) + bool bNeedExtent, int iGeomIdx = 0) { - if (sqlite3_value_type(argv[0]) != SQLITE_BLOB) + if (sqlite3_value_type(argv[iGeomIdx]) != SQLITE_BLOB) { sqlite3_result_null(pContext); return false; } - int nBLOBLen = sqlite3_value_bytes(argv[0]); + int nBLOBLen = sqlite3_value_bytes(argv[iGeomIdx]); const GByte *pabyBLOB = - reinterpret_cast(sqlite3_value_blob(argv[0])); + reinterpret_cast(sqlite3_value_blob(argv[iGeomIdx])); if (nBLOBLen < 8 || GPkgHeaderFromWKB(pabyBLOB, nBLOBLen, psHeader) != OGRERR_NONE) @@ -7691,6 +7691,79 @@ static void OGRGeoPackageSTGeometryType(sqlite3_context *pContext, int /*argc*/, SQLITE_TRANSIENT); } +/************************************************************************/ +/* OGRGeoPackageSTEnvelopesIntersects() */ +/************************************************************************/ + +static void OGRGeoPackageSTEnvelopesIntersects(sqlite3_context *pContext, + int argc, sqlite3_value **argv) +{ + GPkgHeader sHeader; + if (!OGRGeoPackageGetHeader(pContext, argc, argv, &sHeader, true)) + { + sqlite3_result_int(pContext, FALSE); + return; + } + const double dfMinX = sqlite3_value_double(argv[1]); + if (sHeader.MaxX < dfMinX) + { + sqlite3_result_int(pContext, FALSE); + return; + } + const double dfMinY = sqlite3_value_double(argv[2]); + if (sHeader.MaxY < dfMinY) + { + sqlite3_result_int(pContext, FALSE); + return; + } + const double dfMaxX = sqlite3_value_double(argv[3]); + if (sHeader.MinX > dfMaxX) + { + sqlite3_result_int(pContext, FALSE); + return; + } + const double dfMaxY = sqlite3_value_double(argv[4]); + sqlite3_result_int(pContext, sHeader.MinY <= dfMaxY); +} + +/************************************************************************/ +/* OGRGeoPackageSTEnvelopesIntersectsTwoParams() */ +/************************************************************************/ + +static void +OGRGeoPackageSTEnvelopesIntersectsTwoParams(sqlite3_context *pContext, int argc, + sqlite3_value **argv) +{ + GPkgHeader sHeader; + if (!OGRGeoPackageGetHeader(pContext, argc, argv, &sHeader, true, 0)) + { + sqlite3_result_int(pContext, FALSE); + return; + } + GPkgHeader sHeader2; + if (!OGRGeoPackageGetHeader(pContext, argc, argv, &sHeader2, true, 1)) + { + sqlite3_result_int(pContext, FALSE); + return; + } + if (sHeader.MaxX < sHeader2.MinX) + { + sqlite3_result_int(pContext, FALSE); + return; + } + if (sHeader.MaxY < sHeader2.MinY) + { + sqlite3_result_int(pContext, FALSE); + return; + } + if (sHeader.MinX > sHeader2.MaxX) + { + sqlite3_result_int(pContext, FALSE); + return; + } + sqlite3_result_int(pContext, sHeader.MinY <= sHeader2.MaxY); +} + /************************************************************************/ /* OGR_GPKG_GeometryTypeAggregate() */ /************************************************************************/ @@ -8793,6 +8866,20 @@ void GDALGeoPackageDataset::InstallSQLFunctions() sqlite3_create_function(hDB, "SridFromAuthCRS", 2, SQLITE_UTF8, this, OGRGeoPackageSridFromAuthCRS, nullptr, nullptr); + sqlite3_create_function( + hDB, "ST_EnvIntersects", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, nullptr, + OGRGeoPackageSTEnvelopesIntersectsTwoParams, nullptr, nullptr); + sqlite3_create_function( + hDB, "ST_EnvelopesIntersects", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, + nullptr, OGRGeoPackageSTEnvelopesIntersectsTwoParams, nullptr, nullptr); + + sqlite3_create_function( + hDB, "ST_EnvIntersects", 5, SQLITE_UTF8 | SQLITE_DETERMINISTIC, nullptr, + OGRGeoPackageSTEnvelopesIntersects, nullptr, nullptr); + sqlite3_create_function( + hDB, "ST_EnvelopesIntersects", 5, SQLITE_UTF8 | SQLITE_DETERMINISTIC, + nullptr, OGRGeoPackageSTEnvelopesIntersects, nullptr, nullptr); + // Implementation that directly hacks the GeoPackage geometry blob header sqlite3_create_function(hDB, "SetSRID", 2, SQLITE_UTF8 | SQLITE_DETERMINISTIC, nullptr, diff --git a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp index cf5c993ecf8e..98552478eeef 100644 --- a/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp +++ b/ogr/ogrsf_frmts/gpkg/ogrgeopackagetablelayer.cpp @@ -5117,12 +5117,10 @@ CPLString OGRGeoPackageTableLayer::GetSpatialWhere(int iGeomColIn, /* A bit inefficient but still faster than OGR filtering */ osSpatialWHERE.Printf( - "(ST_MaxX(\"%s\") >= %.12f AND ST_MinX(\"%s\") <= %.12f AND " - "ST_MaxY(\"%s\") >= %.12f AND ST_MinY(\"%s\") <= %.12f)", + "ST_EnvelopesIntersects(\"%s\", %.12f, %.12f, %.12f, %.12f)", SQLEscapeName(pszC).c_str(), sEnvelope.MinX - 1e-11, - SQLEscapeName(pszC).c_str(), sEnvelope.MaxX + 1e-11, - SQLEscapeName(pszC).c_str(), sEnvelope.MinY - 1e-11, - SQLEscapeName(pszC).c_str(), sEnvelope.MaxY + 1e-11); + sEnvelope.MinY - 1e-11, sEnvelope.MaxX + 1e-11, + sEnvelope.MaxY + 1e-11); } }