Skip to content

Commit

Permalink
OGR SQLite/SQLite/GPKG: add UTF-8 support for case-insensitive LIKE (…
Browse files Browse the repository at this point in the history
…fixes OSGeo#8835)
  • Loading branch information
rouault committed Nov 27, 2023
1 parent f8b889c commit d6843f9
Show file tree
Hide file tree
Showing 13 changed files with 319 additions and 3 deletions.
51 changes: 51 additions & 0 deletions autotest/ogr/ogr_gpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9739,3 +9739,54 @@ def test_ogr_gpkg_arrow_stream_huge_array(tmp_vsimem, too_big_field):
assert got_fids == [i + 1 for i in range(50)]
assert batch_count == (25 if too_big_field == "geometry" else 21), lyr_name
del stream


###############################################################################
# Test our overloaded LIKE operator


@gdaltest.enable_exceptions()
def test_ogr_gpkg_like_utf8(tmp_vsimem):

filename = str(tmp_vsimem / "test_ogr_gpkg_like_utf8.gpkg")
ds = ogr.GetDriverByName("GPKG").CreateDataSource(filename)
lyr = ds.CreateLayer("test")
lyr.CreateFeature(ogr.Feature(lyr.GetLayerDefn()))

with ds.ExecuteSQL("SELECT * FROM test WHERE 'e' LIKE 'E'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1

with ds.ExecuteSQL("SELECT * FROM test WHERE 'e' LIKE 'i'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL("SELECT * FROM test WHERE 'é' LIKE 'É'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'éx' LIKE 'Éxx' ESCAPE 'x'"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1

with ds.ExecuteSQL("SELECT * FROM test WHERE NULL LIKE 'É'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL("SELECT * FROM test WHERE 'é' LIKE NULL") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL("SELECT * FROM test WHERE 'é' LIKE 'É' ESCAPE NULL") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'é' LIKE 'É' ESCAPE 'should be single char'"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

ds.ExecuteSQL("PRAGMA case_sensitive_like = 1")

with ds.ExecuteSQL("SELECT * FROM test WHERE 'e' LIKE 'E'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

ds.ExecuteSQL("PRAGMA case_sensitive_like = 0")

with ds.ExecuteSQL("SELECT * FROM test WHERE 'e' LIKE 'E'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1
53 changes: 53 additions & 0 deletions autotest/ogr/ogr_sql_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2426,3 +2426,56 @@ def test_ogr_sql_sqlite_unsupported(sql):
lyr.CreateField(ogr.FieldDefn("foo"))
with pytest.raises(Exception):
ds.ExecuteSQL(sql, dialect="SQLite")


###############################################################################
# Test our overloaded LIKE operator


@gdaltest.enable_exceptions()
def test_ogr_sql_sqlite_like_utf8():

ds = ogr.GetDriverByName("Memory").CreateDataSource("")
lyr = ds.CreateLayer("test", options=["ADVERTIZE_UTF8=YES"])
lyr.CreateFeature(ogr.Feature(lyr.GetLayerDefn()))

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'e' LIKE 'E'", dialect="SQLite"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'e' LIKE 'i'", dialect="SQLite"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'é' LIKE 'É'", dialect="SQLite"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'éx' LIKE 'Éxx' ESCAPE 'x'", dialect="SQLite"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1

with ds.ExecuteSQL(
"SELECT * FROM test WHERE NULL LIKE 'É'", dialect="SQLite"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'é' LIKE NULL", dialect="SQLite"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'é' LIKE 'É' ESCAPE NULL", dialect="SQLite"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'é' LIKE 'É' ESCAPE 'should be single char'",
dialect="SQLite",
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0
50 changes: 50 additions & 0 deletions autotest/ogr/ogr_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -3991,3 +3991,53 @@ def test_ogr_sql_sql_first_geom_null(require_spatialite):
assert sql_lyr.GetGeometryColumn() == "ST_Buffer(geom,0.1)"
with ds.ExecuteSQL("SELECT ST_Buffer(geom,0.1), * FROM test") as sql_lyr:
assert sql_lyr.GetGeometryColumn() == "ST_Buffer(geom,0.1)"


###############################################################################
# Test our overloaded LIKE operator


@gdaltest.enable_exceptions()
def test_ogr_sqlite_like_utf8():

ds = ogr.GetDriverByName("SQLite").CreateDataSource(":memory:")
lyr = ds.CreateLayer("test")
lyr.CreateFeature(ogr.Feature(lyr.GetLayerDefn()))

with ds.ExecuteSQL("SELECT * FROM test WHERE 'e' LIKE 'E'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1

with ds.ExecuteSQL("SELECT * FROM test WHERE 'e' LIKE 'i'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL("SELECT * FROM test WHERE 'é' LIKE 'É'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'éx' LIKE 'Éxx' ESCAPE 'x'"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1

with ds.ExecuteSQL("SELECT * FROM test WHERE NULL LIKE 'É'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL("SELECT * FROM test WHERE 'é' LIKE NULL") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL("SELECT * FROM test WHERE 'é' LIKE 'É' ESCAPE NULL") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

with ds.ExecuteSQL(
"SELECT * FROM test WHERE 'é' LIKE 'É' ESCAPE 'should be single char'"
) as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

ds.ExecuteSQL("PRAGMA case_sensitive_like = 1")

with ds.ExecuteSQL("SELECT * FROM test WHERE 'e' LIKE 'E'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 0

ds.ExecuteSQL("PRAGMA case_sensitive_like = 0")

with ds.ExecuteSQL("SELECT * FROM test WHERE 'e' LIKE 'E'") as sql_lyr:
assert sql_lyr.GetFeatureCount() == 1
16 changes: 16 additions & 0 deletions doc/source/user/sql_sqlite_dialect.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,22 @@ translated, as far as possible, as attribute filters that are applied on the
underlying OGR layers. Joins can be very expensive operations if the secondary table is not
indexed on the key field being used.

LIKE operator
+++++++++++++

In SQLite, the LIKE operator is case insensitive, unless ``PRAGMA case_sensitive_like = 1``
has been issued.

Starting with GDAL 3.9, GDAL installs a custom LIKE comparison, such that UTF-8
characters are taken into account by ``LIKE`` and ``ILIKE`` operators.
For ILIKE case insensitive comparisons, this is restricted to the
`ASCII <https://en.wikipedia.org/wiki/Basic_Latin_(Unicode_block)>`__,
`Latin-1 Supplement <https://en.wikipedia.org/wiki/Latin-1_Supplement_(Unicode_block)>`__,
`Latin Extended-A <https://en.wikipedia.org/wiki/Latin_Extended-A>`__,
`Latin Extended-B <https://en.wikipedia.org/wiki/Latin_Extended-B>`__,
`Greek and Coptic <https://en.wikipedia.org/wiki/Greek_and_Coptic>`__
and `Cyrillic <https://en.wikipedia.org/wiki/Greek_and_Coptic>`__ Unicode categories.

Delimited identifiers
+++++++++++++++++++++

Expand Down
2 changes: 2 additions & 0 deletions ogr/ogr_swq.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,8 @@ char CPL_UNSTABLE_API *OGRHStoreGetValue(const char *pszHStore,
void swq_fixup(swq_parse_context *psParseContext);
swq_expr_node *swq_create_and_or_or(swq_op op, swq_expr_node *left,
swq_expr_node *right);
int swq_test_like(const char *input, const char *pattern, char chEscape,
bool insensitive, bool bUTF8Strings);
#endif

#endif /* #ifndef DOXYGEN_SKIP */
Expand Down
18 changes: 18 additions & 0 deletions ogr/ogrsf_frmts/gpkg/ogrgeopackagedatasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <sstream>

#define COMPILATION_ALLOWED
#define DEFINE_OGRSQLiteSQLFunctionsSetCaseSensitiveLike
#include "ogrsqlitesqlfunctionscommon.cpp"

// Keep in sync prototype of those 2 functions between gdalopeninfo.cpp,
Expand Down Expand Up @@ -7146,6 +7147,21 @@ OGRLayer *GDALGeoPackageDataset::ExecuteSQL(const char *pszSQLCommand,
}
}

if (EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like = 0") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like=0") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like =0") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like= 0"))
{
OGRSQLiteSQLFunctionsSetCaseSensitiveLike(m_pSQLFunctionData, false);
}
else if (EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like = 1") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like=1") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like =1") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like= 1"))
{
OGRSQLiteSQLFunctionsSetCaseSensitiveLike(m_pSQLFunctionData, true);
}

/* -------------------------------------------------------------------- */
/* DEBUG "SELECT nolock" command. */
/* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -8910,7 +8926,9 @@ static void GPKG_ogr_layer_Extent(sqlite3_context *pContext, int /*argc*/,
#define SQLITE_INNOCUOUS 0
#endif

#ifndef UTF8_INNOCUOUS
#define UTF8_INNOCUOUS (SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS)
#endif

void GDALGeoPackageDataset::InstallSQLFunctions()
{
Expand Down
4 changes: 4 additions & 0 deletions ogr/ogrsf_frmts/sqlite/ogr_sqlite.h
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,8 @@ class OGRSQLiteSelectLayer CPL_NON_FINAL : public OGRSQLiteLayer,
/* OGRSQLiteDataSource */
/************************************************************************/

class OGR2SQLITEModule;

class OGRSQLiteDataSource final : public OGRSQLiteBaseDataSource
{
OGRSQLiteLayer **m_papoLayers = nullptr;
Expand Down Expand Up @@ -688,6 +690,8 @@ class OGRSQLiteDataSource final : public OGRSQLiteBaseDataSource
OGRSQLiteDataSource *m_poParentDS = nullptr;
std::vector<OGRSQLiteDataSource *> m_apoOverviewDS{};

OGR2SQLITEModule *m_poSQLiteModule = nullptr;

#ifdef HAVE_RASTERLITE2
void ListOverviews();
void CreateRL2OverviewDatasetIfNeeded(double dfXRes, double dfYRes);
Expand Down
19 changes: 18 additions & 1 deletion ogr/ogrsf_frmts/sqlite/ogrsqlitedatasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1673,7 +1673,7 @@ bool OGRSQLiteDataSource::OpenOrCreateDB(int flagsIn,
// that will do it with other datasets.
CPLTestBool(CPLGetConfigOption("OGR_SQLITE_STATIC_VIRTUAL_OGR", "YES")))
{
OGR2SQLITE_Setup(this, this);
m_poSQLiteModule = OGR2SQLITE_Setup(this, this);
}
// We need to do LoadExtensions() after OGR2SQLITE_Setup(), otherwise
// tests in ogr_virtualogr.py::test_ogr_sqlite_load_extensions_load_self()
Expand Down Expand Up @@ -3213,6 +3213,23 @@ OGRLayer *OGRSQLiteDataSource::ExecuteSQL(const char *pszSQLCommand,
return GDALDataset::ExecuteSQL(pszSQLCommand, poSpatialFilter,
"SQLITE");

if (EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like = 0") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like=0") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like =0") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like= 0"))
{
if (m_poSQLiteModule)
OGR2SQLITE_SetCaseSensitiveLike(m_poSQLiteModule, false);
}
else if (EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like = 1") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like=1") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like =1") ||
EQUAL(pszSQLCommand, "PRAGMA case_sensitive_like= 1"))
{
if (m_poSQLiteModule)
OGR2SQLITE_SetCaseSensitiveLike(m_poSQLiteModule, true);
}

/* -------------------------------------------------------------------- */
/* Special case DELLAYER: command. */
/* -------------------------------------------------------------------- */
Expand Down
2 changes: 2 additions & 0 deletions ogr/ogrsf_frmts/sqlite/ogrsqlitesqlfunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,9 @@ static void OGRSQLITE_hstore_get_value(sqlite3_context *pContext,
#define SQLITE_INNOCUOUS 0
#endif

#ifndef UTF8_INNOCUOUS
#define UTF8_INNOCUOUS (SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS)
#endif

static void *OGRSQLiteRegisterSQLFunctions(sqlite3 *hDB)
{
Expand Down
Loading

0 comments on commit d6843f9

Please sign in to comment.