import datetime from pathlib import Path import shutil from osgeo import gdal import geopandas as gpd import shapely # Prepare test data x, y = (0, 0) geom_array = [] size = 10 while y <= 10000: while x <= 10000: geom = shapely.Polygon( [(x, y), (x, y + size), (x + size, y + size), (x + size, y), (x, y)] ) geom_array.append(geom) x += size x = 0 y += size gdf = gpd.GeoDataFrame(geom_array, columns=["geometry"], crs=31370) # type: ignore print(f"Test dataset with {len(gdf)} squares ready") # Prepare output dir script_path = Path(__file__).resolve() data_dir = script_path.parent / script_path.stem shutil.rmtree(data_dir, ignore_errors=True) data_dir.mkdir(exist_ok=True) # Create the shp test file without spatial index start = datetime.datetime.now() gdf.to_file(data_dir / "test.shp", SPATIAL_INDEX="NO", engine="pyogrio") print(f"writing test.shp without spatial index took {datetime.datetime.now() - start}") # Create spatial index on the shp file start = datetime.datetime.now() datasource = gdal.OpenEx(str(data_dir / "test.shp"), nOpenFlags=gdal.OF_UPDATE) result = datasource.ExecuteSQL(f'CREATE SPATIAL INDEX ON "test"') datasource.ReleaseResultSet(result) print(f"create spatial index on test.shp took {datetime.datetime.now() - start}") # Create the gpkg test file without spatial index start = datetime.datetime.now() gdf.to_file(data_dir / "test.gpkg", SPATIAL_INDEX="NO", engine="pyogrio") print(f"writing test.gpkg without spatial index took {datetime.datetime.now() - start}") # Create the gpkg test file with spatial index start = datetime.datetime.now() gdf.to_file(data_dir / "test_si.gpkg", engine="pyogrio") print(f"writing test_si.gpkg with spatial index took {datetime.datetime.now() - start}") # Create spatial index on the gpkg file, with default sqlite cache start = datetime.datetime.now() gdal.SetConfigOption("OGR_SQLITE_CACHE", None) datasource = gdal.OpenEx(str(data_dir / "test.gpkg"), nOpenFlags=gdal.OF_UPDATE) sql = "SELECT CreateSpatialIndex('test', 'geom')" result = datasource.ExecuteSQL(sql) datasource.ReleaseResultSet(result) took = datetime.datetime.now() - start print(f"create spatial index on test.gpkg with default sqlite cache took {took}") # Recreate spatial index on the gpkg file, with 512 MB sqlite cache result = datasource.ExecuteSQL("SELECT DisableSpatialIndex('test', 'geom')") datasource.ReleaseResultSet(result) start = datetime.datetime.now() gdal.SetConfigOption("OGR_SQLITE_CACHE", "512") sql = "SELECT CreateSpatialIndex('test', 'geom')" result = datasource.ExecuteSQL(sql) datasource.ReleaseResultSet(result) took = datetime.datetime.now() - start print(f"create spatial index on test.gpkg with 512 MB sqlite cache took {took}")