Skip to content

Commit

Permalink
GPKG: use much faster creation of RTree with a in-memory RTree building
Browse files Browse the repository at this point in the history
```
$ time ogr2ogr tmp.gpkg nz-building-outlines.gpkg
real    0m16,433s
user    0m18,093s
sys     0m2,135s
```

vs without optimization. With sqlite 3.43.2, with disabling of RTree forced reinsertion
from SQLite master sqlite/sqlite@7de8ae2).
Otherwise it is 38 seconds

```
$ time OGR_GPKG_MAX_RAM_USAGE_RTREE=0 ogr2ogr tmp.gpkg nz-building-outlines.gpkg
real    0m28,000s
user    0m40,287s
sys     0m5,244s
```
  • Loading branch information
rouault committed Oct 22, 2023
1 parent 182b844 commit bdceec9
Show file tree
Hide file tree
Showing 11 changed files with 1,409 additions and 200 deletions.
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,6 @@ repos:
ogr/ogrsf_frmts/cad/libopencad/|
ogr/ogrsf_frmts/geojson/libjson/|
ogr/ogrsf_frmts/flatgeobuf/flatbuffers/|
ogr/ogrsf_frmts/pmtiles/pmtiles/
ogr/ogrsf_frmts/pmtiles/pmtiles/|
ogr/ogrsf_frmts/sqlite/sqlite_rtree_bulk_load
)
75 changes: 47 additions & 28 deletions autotest/ogr/ogr_gpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -8389,42 +8389,53 @@ def test_ogr_gpkg_get_geometry_types(tmp_vsimem):


@pytest.mark.parametrize("write_to_disk", (True, False), ids=["on_disk", "in_memory"])
def test_ogr_gpkg_background_rtree_build(tmp_path, tmp_vsimem, write_to_disk):
@pytest.mark.parametrize("OGR_GPKG_MAX_RAM_USAGE_RTREE", (1, 1000, None))
def test_ogr_gpkg_background_rtree_build(
tmp_path, tmp_vsimem, write_to_disk, OGR_GPKG_MAX_RAM_USAGE_RTREE
):

if write_to_disk:
filename = tmp_path / "test_ogr_gpkg_background_rtree_build.gpkg"
else:
filename = tmp_vsimem / "test_ogr_gpkg_background_rtree_build.gpkg"

# Batch insertion only

gdal.ErrorReset()
ds = gdaltest.gpkg_dr.CreateDataSource(filename)
with gdaltest.config_option("OGR_GPKG_THREADED_RTREE_AT_FIRST_FEATURE", "YES"):
lyr = ds.CreateLayer("foo")
assert lyr.StartTransaction() == ogr.OGRERR_NONE
for i in range(1000):
f = ogr.Feature(lyr.GetLayerDefn())
f.SetGeometryDirectly(ogr.CreateGeometryFromWkt("POINT(%d %d)" % (i, i)))
assert lyr.CreateFeature(f) == ogr.OGRERR_NONE
if i == 500:
assert lyr.CommitTransaction() == ogr.OGRERR_NONE
assert lyr.StartTransaction() == ogr.OGRERR_NONE
assert lyr.CommitTransaction() == ogr.OGRERR_NONE
assert gdal.GetLastErrorMsg() == ""

with gdaltest.config_option("OGR_GPKG_THREADED_RTREE_AT_FIRST_FEATURE", "YES"):
lyr = ds.CreateLayer("bar")
assert lyr.StartTransaction() == ogr.OGRERR_NONE
for i in range(900):
f = ogr.Feature(lyr.GetLayerDefn())
f.SetGeometryDirectly(ogr.CreateGeometryFromWkt("POINT(%d %d)" % (-i, -i)))
assert lyr.CreateFeature(f) == ogr.OGRERR_NONE
if i == 500:
assert lyr.CommitTransaction() == ogr.OGRERR_NONE
assert lyr.StartTransaction() == ogr.OGRERR_NONE
assert lyr.CommitTransaction() == ogr.OGRERR_NONE
assert gdal.GetLastErrorMsg() == ""
with gdaltest.config_option(
"OGR_GPKG_MAX_RAM_USAGE_RTREE",
str(OGR_GPKG_MAX_RAM_USAGE_RTREE)
if OGR_GPKG_MAX_RAM_USAGE_RTREE is not None
else None,
thread_local=False,
):
ds = gdaltest.gpkg_dr.CreateDataSource(filename)
with gdaltest.config_option("OGR_GPKG_THREADED_RTREE_AT_FIRST_FEATURE", "YES"):
lyr = ds.CreateLayer("foo")
assert lyr.StartTransaction() == ogr.OGRERR_NONE
for i in range(1000):
f = ogr.Feature(lyr.GetLayerDefn())
f.SetGeometryDirectly(
ogr.CreateGeometryFromWkt("POINT(%d %d)" % (10000 + i, i))
)
assert lyr.CreateFeature(f) == ogr.OGRERR_NONE
if i == 500:
assert lyr.CommitTransaction() == ogr.OGRERR_NONE
assert lyr.StartTransaction() == ogr.OGRERR_NONE
assert lyr.CommitTransaction() == ogr.OGRERR_NONE
assert gdal.GetLastErrorMsg() == ""

with gdaltest.config_option("OGR_GPKG_THREADED_RTREE_AT_FIRST_FEATURE", "YES"):
lyr = ds.CreateLayer("bar")
assert lyr.StartTransaction() == ogr.OGRERR_NONE
for i in range(900):
f = ogr.Feature(lyr.GetLayerDefn())
f.SetGeometryDirectly(ogr.CreateGeometryFromWkt("POINT(%d %d)" % (-i, -i)))
assert lyr.CreateFeature(f) == ogr.OGRERR_NONE
if i == 500:
assert lyr.CommitTransaction() == ogr.OGRERR_NONE
assert lyr.StartTransaction() == ogr.OGRERR_NONE
assert lyr.CommitTransaction() == ogr.OGRERR_NONE
assert gdal.GetLastErrorMsg() == ""

ds = None
assert gdal.VSIStatL(filename.with_suffix(".gpkg.tmp_rtree_foo.db")) is None
Expand All @@ -8434,9 +8445,17 @@ def test_ogr_gpkg_background_rtree_build(tmp_path, tmp_vsimem, write_to_disk):
sql_lyr = ds.ExecuteSQL("SELECT * FROM rtree_foo_geom")
assert sql_lyr.GetFeatureCount() == 1000
ds.ReleaseResultSet(sql_lyr)
foo_lyr = ds.GetLayerByName("foo")
for i in range(1000):
foo_lyr.SetSpatialFilterRect(10000 + i - 0.5, i - 0.5, 10000 + i + 0.5, i + 0.5)
assert foo_lyr.GetFeatureCount() == 1, i
sql_lyr = ds.ExecuteSQL("SELECT * FROM rtree_bar_geom")
assert sql_lyr.GetFeatureCount() == 900
ds.ReleaseResultSet(sql_lyr)
bar_lyr = ds.GetLayerByName("bar")
for i in range(900):
bar_lyr.SetSpatialFilterRect(-i - 0.5, -i - 0.5, -i + 0.5, -i + 0.5)
assert bar_lyr.GetFeatureCount() == 1, i
ds = None

gdal.Unlink(filename)
Expand Down
3 changes: 3 additions & 0 deletions ogr/ogrsf_frmts/gpkg/ogr_geopackage.h
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,7 @@ class OGRGeoPackageLayer CPL_NON_FINAL : public OGRLayer,
/************************************************************************/

struct OGRGPKGTableLayerFillArrowArray;
struct sqlite_rtree_bl;

class OGRGeoPackageTableLayer final : public OGRGeoPackageLayer
{
Expand Down Expand Up @@ -674,6 +675,7 @@ class OGRGeoPackageTableLayer final : public OGRGeoPackageLayer
std::string m_osAsyncDBName{};
std::string m_osAsyncDBAttachName{};
sqlite3 *m_hAsyncDBHandle = nullptr;
sqlite_rtree_bl *m_hRTree = nullptr;
cpl::ThreadSafeQueue<std::vector<GPKGRTreeEntry>> m_oQueueRTreeEntries{};
bool m_bAllowedRTreeThread = false;
bool m_bThreadRTreeStarted = false;
Expand Down Expand Up @@ -854,6 +856,7 @@ class OGRGeoPackageTableLayer final : public OGRGeoPackageLayer

void CreateSpatialIndexIfNecessary();
void FinishOrDisableThreadedRTree();
bool FlushInMemoryRTree(sqlite3 *hRTreeDB, const char *pszRTreeName);
bool CreateSpatialIndex(const char *pszTableName = nullptr);
bool DropSpatialIndex(bool bCalledFromSQLFunction = false);
CPLString ReturnSQLCreateSpatialIndexTriggers(const char *pszTableName,
Expand Down
Loading

0 comments on commit bdceec9

Please sign in to comment.