Skip to content

Commit

Permalink
Merge pull request #8582 from rouault/ogr2ogr_arrow_json
Browse files Browse the repository at this point in the history
ogr2ogr: preserve OFSTJSON when using ArrowArray code path
  • Loading branch information
rouault authored Oct 20, 2023
2 parents 0ac826e + fb92aa5 commit e314bd5
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 6 deletions.
36 changes: 33 additions & 3 deletions apps/ogr2ogr_lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3886,19 +3886,49 @@ bool SetupTargetLayer::CanUseWriteArrowBatch(
poDstFDefn->GetGeomFieldCount() ==
poSrcFDefn->GetGeomFieldCount())
{
std::map<std::string, OGRFieldSubType>
oMapFieldNameToSubType;
for (int i = 0; i < poSrcFDefn->GetFieldCount(); ++i)
{
const auto poSrcFieldDefn =
poSrcFDefn->GetFieldDefn(i);
if (poSrcFieldDefn->GetSubType() == OFSTJSON)
oMapFieldNameToSubType[poSrcFieldDefn
->GetNameRef()] =
poSrcFieldDefn->GetSubType();
}
std::set<std::string> oSetGeomFieldNames;
for (int i = 0; i < poSrcFDefn->GetGeomFieldCount();
++i)
{
const auto poSrcGeomFieldDefn =
poSrcFDefn->GetGeomFieldDefn(i);
oSetGeomFieldNames.insert(
poSrcGeomFieldDefn->GetNameRef());
}

// Create output fields using CreateFieldFromArrowSchema()
for (int i = 0; i < schemaSrc.n_children; ++i)
{
const char *pszFieldName =
schemaSrc.children[i]->name;
CPLStringList aosOptions;
auto oIterSubType =
oMapFieldNameToSubType.find(pszFieldName);
if (oIterSubType != oMapFieldNameToSubType.end())
{
aosOptions.SetNameValue(
"SUBTYPE", OGR_GetFieldSubTypeName(
oIterSubType->second));
}
if (!EQUAL(pszFieldName, "OGC_FID") &&
!EQUAL(pszFieldName, "wkb_geometry") &&
!EQUAL(pszFieldName,
poSrcLayer->GetFIDColumn()) &&
poSrcFDefn->GetGeomFieldIndex(pszFieldName) <
0 &&
oSetGeomFieldNames.find(pszFieldName) ==
oSetGeomFieldNames.end() &&
!poDstLayer->CreateFieldFromArrowSchema(
schemaSrc.children[i], nullptr))
schemaSrc.children[i], aosOptions.List()))
{
CPLError(CE_Failure, CPLE_AppDefined,
"Cannot create field %s",
Expand Down
11 changes: 11 additions & 0 deletions autotest/utilities/test_ogr2ogr_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2123,9 +2123,13 @@ def test_ogr2ogr_lib_OGR2OGR_USE_ARROW_API_YES(limit):
src_ds = gdal.GetDriverByName("Memory").Create("", 0, 0, 0, gdal.GDT_Unknown)
src_lyr = src_ds.CreateLayer("test")
src_lyr.CreateField(ogr.FieldDefn("str_field"))
fld_defn = ogr.FieldDefn("json_field")
fld_defn.SetSubType(ogr.OFSTJSON)
src_lyr.CreateField(fld_defn)
for i in range(2):
f = ogr.Feature(src_lyr.GetLayerDefn())
f["str_field"] = "foo%d" % i
f["json_field"] = '{"foo":"bar"}'
f.SetGeometry(ogr.CreateGeometryFromWkt("POINT (%d 2)" % i))
src_lyr.CreateFeature(f)

Expand All @@ -2148,10 +2152,17 @@ def my_handler(errorClass, errno, msg):
assert "OGR2OGR: Using WriteArrowBatch()" in got_msg

out_lyr = out_ds.GetLayer(0)
assert out_lyr.GetLayerDefn().GetFieldDefn(0).GetName() == "str_field"
assert out_lyr.GetLayerDefn().GetFieldDefn(0).GetType() == ogr.OFTString
assert out_lyr.GetLayerDefn().GetFieldDefn(0).GetSubType() == ogr.OFSTNone
assert out_lyr.GetLayerDefn().GetFieldDefn(1).GetName() == "json_field"
assert out_lyr.GetLayerDefn().GetFieldDefn(1).GetType() == ogr.OFTString
assert out_lyr.GetLayerDefn().GetFieldDefn(1).GetSubType() == ogr.OFSTJSON
assert out_lyr.GetFeatureCount() == (limit if limit else src_lyr.GetFeatureCount())

f = out_lyr.GetNextFeature()
assert f["str_field"] == "foo0"
assert f["json_field"] == '{"foo":"bar"}'
assert f.GetGeometryRef().ExportToIsoWkt() == "POINT (0 2)"

if not limit:
Expand Down
19 changes: 16 additions & 3 deletions ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5020,7 +5020,14 @@ bool OGRLayer::CreateFieldFromArrowSchemaInternal(
{
if (strcmp(format, sType.arrowType) == 0)
{
return AddField(sType.eType, sType.eSubType, 0, 0);
OGRFieldSubType eSubType = sType.eSubType;
if (sType.eType == OFTString &&
EQUAL(CSLFetchNameValueDef(papszOptions, "SUBTYPE", ""),
"JSON"))
{
eSubType = OFSTJSON;
}
return AddField(sType.eType, eSubType, 0, 0);
}
}

Expand Down Expand Up @@ -5139,10 +5146,13 @@ bool OGRLayer::CreateFieldFromArrowSchemaInternal(
*
* This method and CreateField() are mutually exclusive in the same session.
*
* The base implementation of CreateFieldFromArrowSchema() supports the
* option SUBTYPE=JSON for fields of type string.
*
* This method is the same as the C function OGR_L_CreateFieldFromArrowSchema().
*
* @param schema Schema of the field to create.
* @param papszOptions Options (none currently). Null terminated list, or nullptr.
* @param papszOptions Options. Null terminated list, or nullptr.
* @return true in case of success
* @since 3.8
*/
Expand Down Expand Up @@ -5170,11 +5180,14 @@ bool OGRLayer::CreateFieldFromArrowSchema(const struct ArrowSchema *schema,
*
* This method and CreateField() are mutually exclusive in the same session.
*
* The base implementation of CreateFieldFromArrowSchema() supports the
* option SUBTYPE=JSON for fields of type string.
*
* This method is the same as the C++ method OGRLayer::CreateFieldFromArrowSchema().
*
* @param hLayer Layer.
* @param schema Schema of the field to create.
* @param papszOptions Options (none currently). Null terminated list, or nullptr.
* @param papszOptions Options. Null terminated list, or nullptr.
* @return true in case of success
* @since 3.8
*/
Expand Down

0 comments on commit e314bd5

Please sign in to comment.