Skip to content

Commit

Permalink
OGR SQL: add UTF-8 support for LIKE/ILIKE (for layers declaring OLCSt…
Browse files Browse the repository at this point in the history
…ringsAsUTF8) (fixes OSGeo#8835)
  • Loading branch information
rouault committed Nov 27, 2023
1 parent aceb41e commit ed51608
Show file tree
Hide file tree
Showing 10 changed files with 1,922 additions and 33 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ repos:
frmts/pcidsk/sdk|
frmts/grib/degrib/degrib|
frmts/grib/degrib/g2clib|
ogr/utf8.h|
ogr/ogrsf_frmts/cad/libopencad/|
ogr/ogrsf_frmts/geojson/libjson/|
ogr/ogrsf_frmts/flatgeobuf/flatbuffers/|
Expand Down
97 changes: 97 additions & 0 deletions autotest/ogr/ogr_sql_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1720,3 +1720,100 @@ def test_ogr_sql_select_except_join_3(select_except_join_ds):
assert defn.GetFieldCount() == 2
assert defn.GetFieldDefn(0).GetName() == "id"
assert defn.GetFieldDefn(1).GetName() == "name"


def test_ogr_sql_like_utf8():

ds = ogr.GetDriverByName("Memory").CreateDataSource("")
lyr = ds.CreateLayer("test", options=["ADVERTIZE_UTF8=YES"])
lyr.CreateFeature(ogr.Feature(lyr.GetLayerDefn()))

lyr.SetAttributeFilter("'é' LIKE 'É'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'É' LIKE 'é'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'É' LIKE 'É'")
assert lyr.GetFeatureCount() == 1

lyr.SetAttributeFilter("'é' LIKE 'e'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'é' LIKE 'ê'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'é' LIKE ''")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'é' LIKE '_'")
assert lyr.GetFeatureCount() == 1

# Truncated UTF8 character
lyr.SetAttributeFilter("'\xC3' LIKE '_'")
lyr.GetFeatureCount() # we return 1 currently, we could as well return 0...

lyr.SetAttributeFilter("'éven' LIKE '_ven'")
assert lyr.GetFeatureCount() == 1

lyr.SetAttributeFilter("'éven' LIKE '%ven'")
assert lyr.GetFeatureCount() == 1

lyr.SetAttributeFilter("'' LIKE '_'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'éven' LIKE '_xen'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'éven' LIKE '%xen'")
assert lyr.GetFeatureCount() == 0


def test_ogr_sql_ilike_utf8():

ds = ogr.GetDriverByName("Memory").CreateDataSource("")
lyr = ds.CreateLayer("test", options=["ADVERTIZE_UTF8=YES"])
lyr.CreateFeature(ogr.Feature(lyr.GetLayerDefn()))

lyr.SetAttributeFilter("'é' ILIKE 'é'")
assert lyr.GetFeatureCount() == 1

lyr.SetAttributeFilter("'é' ILIKE 'É'")
assert lyr.GetFeatureCount() == 1

lyr.SetAttributeFilter("'É' ILIKE 'é'")
assert lyr.GetFeatureCount() == 1

lyr.SetAttributeFilter("'É' ILIKE 'É'")
assert lyr.GetFeatureCount() == 1

lyr.SetAttributeFilter("'é' ILIKE 'e'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'é' ILIKE 'ê'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'é' ILIKE ''")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'é' ILIKE '_'")
assert lyr.GetFeatureCount() == 1

# Truncated UTF8 character
lyr.SetAttributeFilter("'\xC3' ILIKE '_'")
lyr.GetFeatureCount() # we return 1 currently, we could as well return 0...

lyr.SetAttributeFilter("'éven' ILIKE '_ven'")
assert lyr.GetFeatureCount() == 1

lyr.SetAttributeFilter("'éven' ILIKE '%ven'")
assert lyr.GetFeatureCount() == 1

lyr.SetAttributeFilter("'' ILIKE '_'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'éven' ILIKE '_xen'")
assert lyr.GetFeatureCount() == 0

lyr.SetAttributeFilter("'éven' ILIKE '%xen'")
assert lyr.GetFeatureCount() == 0
2 changes: 2 additions & 0 deletions ogr/ogr_feature.h
Original file line number Diff line number Diff line change
Expand Up @@ -1833,12 +1833,14 @@ class CPL_DLL OGRGlobFieldDomain final : public OGRFieldDomain
class OGRLayer;
class swq_expr_node;
class swq_custom_func_registrar;
struct swq_evaluation_context;

class CPL_DLL OGRFeatureQuery
{
private:
OGRFeatureDefn *poTargetDefn;
void *pSWQExpr;
swq_evaluation_context *m_psContext = nullptr;

char **FieldCollector(void *, char **);

Expand Down
24 changes: 17 additions & 7 deletions ogr/ogr_swq.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,16 @@ class swq_expr_node;
class swq_select;
class OGRGeometry;

struct CPL_UNSTABLE_API swq_evaluation_context
{
bool bUTF8Strings = false;
};

typedef swq_expr_node *(*swq_field_fetcher)(swq_expr_node *op,
void *record_handle);
typedef swq_expr_node *(*swq_op_evaluator)(swq_expr_node *op,
swq_expr_node **sub_field_values);
typedef swq_expr_node *(*swq_op_evaluator)(
swq_expr_node *op, swq_expr_node **sub_field_values,
const swq_evaluation_context &sContext);
typedef swq_field_type (*swq_op_checker)(
swq_expr_node *op, int bAllowMismatchTypeOnFieldComparison);

Expand All @@ -116,6 +122,7 @@ class swq_custom_func_registrar;
class CPL_UNSTABLE_API swq_expr_node
{
swq_expr_node *Evaluate(swq_field_fetcher pfnFetcher, void *record,
const swq_evaluation_context &sContext,
int nRecLevel);
void reset();

Expand Down Expand Up @@ -146,7 +153,8 @@ class CPL_UNSTABLE_API swq_expr_node
int bAllowMismatchTypeOnFieldComparison,
swq_custom_func_registrar *poCustomFuncRegistrar,
int depth = 0);
swq_expr_node *Evaluate(swq_field_fetcher pfnFetcher, void *record);
swq_expr_node *Evaluate(swq_field_fetcher pfnFetcher, void *record,
const swq_evaluation_context &sContext);
swq_expr_node *Clone();

void ReplaceBetweenByGEAndLERecurse();
Expand Down Expand Up @@ -277,12 +285,14 @@ swq_expr_compile2(const char *where_clause, swq_field_list *field_list,
*/
int CPL_UNSTABLE_API swq_test_like(const char *input, const char *pattern);

swq_expr_node CPL_UNSTABLE_API *SWQGeneralEvaluator(swq_expr_node *,
swq_expr_node **);
swq_expr_node CPL_UNSTABLE_API *
SWQGeneralEvaluator(swq_expr_node *, swq_expr_node **,
const swq_evaluation_context &sContext);
swq_field_type CPL_UNSTABLE_API
SWQGeneralChecker(swq_expr_node *node, int bAllowMismatchTypeOnFieldComparison);
swq_expr_node CPL_UNSTABLE_API *SWQCastEvaluator(swq_expr_node *,
swq_expr_node **);
swq_expr_node CPL_UNSTABLE_API *
SWQCastEvaluator(swq_expr_node *, swq_expr_node **,
const swq_evaluation_context &sContext);
swq_field_type CPL_UNSTABLE_API
SWQCastChecker(swq_expr_node *node, int bAllowMismatchTypeOnFieldComparison);
const char CPL_UNSTABLE_API *SWQFieldTypeToString(swq_field_type field_type);
Expand Down
9 changes: 7 additions & 2 deletions ogr/ogrfeaturequery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ const swq_field_type SpecialFieldTypes[SPECIAL_FIELD_COUNT] = {
/* OGRFeatureQuery() */
/************************************************************************/

OGRFeatureQuery::OGRFeatureQuery() : poTargetDefn(nullptr), pSWQExpr(nullptr)
OGRFeatureQuery::OGRFeatureQuery()
: poTargetDefn(nullptr), pSWQExpr(nullptr),
m_psContext(new swq_evaluation_context())
{
}

Expand All @@ -70,6 +72,7 @@ OGRFeatureQuery::OGRFeatureQuery() : poTargetDefn(nullptr), pSWQExpr(nullptr)
OGRFeatureQuery::~OGRFeatureQuery()

{
delete m_psContext;
delete static_cast<swq_expr_node *>(pSWQExpr);
}

Expand All @@ -83,6 +86,8 @@ OGRFeatureQuery::Compile(OGRLayer *poLayer, const char *pszExpression,
swq_custom_func_registrar *poCustomFuncRegistrar)

{
if (poLayer->TestCapability(OLCStringsAsUTF8))
m_psContext->bUTF8Strings = true;
return Compile(poLayer, poLayer->GetLayerDefn(), pszExpression, bCheck,
poCustomFuncRegistrar);
}
Expand Down Expand Up @@ -318,7 +323,7 @@ int OGRFeatureQuery::Evaluate(OGRFeature *poFeature)
return FALSE;

swq_expr_node *poResult = static_cast<swq_expr_node *>(pSWQExpr)->Evaluate(
OGRFeatureFetcher, poFeature);
OGRFeatureFetcher, poFeature, *m_psContext);

if (poResult == nullptr)
return FALSE;
Expand Down
5 changes: 3 additions & 2 deletions ogr/ogrsf_frmts/generic/ogr_gensql.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1367,6 +1367,7 @@ OGRFeature *OGRGenSQLResultsLayer::TranslateFeature(OGRFeature *poSrcFeat)
/* -------------------------------------------------------------------- */
int iRegularField = 0;
int iGeomField = 0;
swq_evaluation_context sContext;
for (int iField = 0; iField < psSelectInfo->result_columns(); iField++)
{
swq_col_def *psColDef = &psSelectInfo->column_defs[iField];
Expand All @@ -1380,8 +1381,8 @@ OGRFeature *OGRGenSQLResultsLayer::TranslateFeature(OGRFeature *poSrcFeat)
continue;
}

swq_expr_node *poResult =
psColDef->expr->Evaluate(OGRMultiFeatureFetcher, &apoFeatures);
swq_expr_node *poResult = psColDef->expr->Evaluate(
OGRMultiFeatureFetcher, &apoFeatures, sContext);

if (poResult == nullptr)
{
Expand Down
15 changes: 9 additions & 6 deletions ogr/swq_expr_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,14 +816,17 @@ swq_expr_node *swq_expr_node::Clone()
/************************************************************************/

swq_expr_node *swq_expr_node::Evaluate(swq_field_fetcher pfnFetcher,
void *pRecord)
void *pRecord,
const swq_evaluation_context &sContext)

{
return Evaluate(pfnFetcher, pRecord, 0);
return Evaluate(pfnFetcher, pRecord, sContext, 0);
}

swq_expr_node *swq_expr_node::Evaluate(swq_field_fetcher pfnFetcher,
void *pRecord, int nRecLevel)
void *pRecord,
const swq_evaluation_context &sContext,
int nRecLevel)

{
swq_expr_node *poRetNode = nullptr;
Expand Down Expand Up @@ -868,8 +871,8 @@ swq_expr_node *swq_expr_node::Evaluate(swq_field_fetcher pfnFetcher,
}
else
{
swq_expr_node *poSubExprVal =
papoSubExpr[i]->Evaluate(pfnFetcher, pRecord, nRecLevel + 1);
swq_expr_node *poSubExprVal = papoSubExpr[i]->Evaluate(
pfnFetcher, pRecord, sContext, nRecLevel + 1);
if (poSubExprVal == nullptr)
bError = true;
else
Expand Down Expand Up @@ -901,7 +904,7 @@ swq_expr_node *swq_expr_node::Evaluate(swq_field_fetcher pfnFetcher,
poRetNode = nullptr;
}
else
poRetNode = poOp->pfnEvaluator(this, &(apoValues[0]));
poRetNode = poOp->pfnEvaluator(this, &(apoValues[0]), sContext);
}

/* -------------------------------------------------------------------- */
Expand Down
Loading

0 comments on commit ed51608

Please sign in to comment.