From 05f2068d7744ee0fbc293557bc372fea7af83497 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 28 Nov 2023 14:52:06 +0100 Subject: [PATCH] ogr2ogr: fix GPKG -> Shapefile when field names are truncated (fix #8849, 3.8.0 regression) --- autotest/utilities/test_ogr2ogr_lib.py | 43 ++++++++++++++ ogr/ogrsf_frmts/generic/ogrlayer.cpp | 6 +- ogr/ogrsf_frmts/generic/ogrlayer_private.h | 50 +++++++++++++++++ ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp | 65 ++++++++++++++++------ 4 files changed, 141 insertions(+), 23 deletions(-) create mode 100644 ogr/ogrsf_frmts/generic/ogrlayer_private.h diff --git a/autotest/utilities/test_ogr2ogr_lib.py b/autotest/utilities/test_ogr2ogr_lib.py index 15498e1f27a7..666ffce009d2 100755 --- a/autotest/utilities/test_ogr2ogr_lib.py +++ b/autotest/utilities/test_ogr2ogr_lib.py @@ -2406,3 +2406,46 @@ def my_handler(errorClass, errno, msg): f = out_lyr.GetNextFeature() assert f.GetFID() == 1 assert f["foo"] == "baz" + + +############################################################################### + + +@pytest.mark.require_driver("GPKG") +def test_ogr2ogr_lib_gpkg_to_shp_truncated_field_names(tmp_vsimem): + + src_filename = str( + tmp_vsimem / "test_ogr2ogr_lib_gpkg_to_shp_truncated_field_names.gpkg" + ) + src_ds = gdal.GetDriverByName("GPKG").Create( + src_filename, 0, 0, 0, gdal.GDT_Unknown + ) + src_lyr = src_ds.CreateLayer("test") + src_lyr.CreateField(ogr.FieldDefn("shortname")) + src_lyr.CreateField(ogr.FieldDefn("too_long_for_shapefile")) + f = ogr.Feature(src_lyr.GetLayerDefn()) + f.SetField("shortname", "foo") + f.SetField("too_long_for_shapefile", "bar") + src_lyr.CreateFeature(f) + src_ds.Close() + + out_filename = str( + tmp_vsimem / "test_ogr2ogr_lib_gpkg_to_shp_truncated_field_names.shp" + ) + + got_msg = [] + + def my_handler(errorClass, errno, msg): + if errorClass != gdal.CE_Debug: + got_msg.append(msg) + return + + with gdaltest.error_handler(my_handler): + out_ds = gdal.VectorTranslate(out_filename, src_filename) + assert got_msg == [ + "Normalized/laundered field name: 'too_long_for_shapefile' to 'too_long_f'" + ] + out_lyr = out_ds.GetLayer(0) + f = out_lyr.GetNextFeature() + assert f["shortname"] == "foo" + assert f["too_long_f"] == "bar" diff --git a/ogr/ogrsf_frmts/generic/ogrlayer.cpp b/ogr/ogrsf_frmts/generic/ogrlayer.cpp index cddc026285f0..195218ba55f0 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayer.cpp +++ b/ogr/ogrsf_frmts/generic/ogrlayer.cpp @@ -34,17 +34,13 @@ #include "ogr_swq.h" #include "ograpispy.h" #include "ogr_wkb.h" +#include "ogrlayer_private.h" #include "cpl_time.h" #include #include #include -struct OGRLayer::Private -{ - bool m_bInFeatureIterator = false; -}; - /************************************************************************/ /* OGRLayer() */ /************************************************************************/ diff --git a/ogr/ogrsf_frmts/generic/ogrlayer_private.h b/ogr/ogrsf_frmts/generic/ogrlayer_private.h new file mode 100644 index 000000000000..42bb72bcbb9e --- /dev/null +++ b/ogr/ogrsf_frmts/generic/ogrlayer_private.h @@ -0,0 +1,50 @@ +/****************************************************************************** + * + * Project: OpenGIS Simple Features Reference Implementation + * Purpose: OGRLayer::Private struct + * Author: Even Rouault + * + ****************************************************************************** + * Copyright (c) 2024, Even Rouault + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + ****************************************************************************/ + +#ifndef OGRLAYER_PRIVATE_H_INCLUDED +#define OGRLAYER_PRIVATE_H_INCLUDED + +#include "ogrsf_frmts.h" + +//! @cond Doxygen_Suppress +struct OGRLayer::Private +{ + bool m_bInFeatureIterator = false; + + // Used by CreateFieldFromArrowSchema() and WriteArrowBatch() + // to store the mapping between the input Arrow field name and the + // output OGR field name, that can be different sometimes (for example + // Shapefile truncating at 10 characters) + // This is admitedly not super clean to store that mapping at that level. + // We should probably have CreateFieldFromArrowSchema() and + // WriteArrowBatch() explictly returning and accepting that mapping. + std::map m_oMapArrowFieldNameToOGRFieldName{}; +}; +//! @endcond + +#endif /* OGRLAYER_PRIVATE_H_INCLUDED */ diff --git a/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp b/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp index 489a7a0222b2..d15e75fd30c1 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp +++ b/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp @@ -35,6 +35,7 @@ #include "ogr_swq.h" #include "ogr_wkb.h" #include "ogr_p.h" +#include "ogrlayer_private.h" #include "cpl_float.h" #include "cpl_json.h" @@ -5873,7 +5874,8 @@ bool OGRLayer::CreateFieldFromArrowSchemaInternal( : OFSTNone; } - OGRFieldDefn oFieldDefn((osFieldPrefix + fieldName).c_str(), eTypeOut); + const std::string osWantedOGRFieldName = osFieldPrefix + fieldName; + OGRFieldDefn oFieldDefn(osWantedOGRFieldName.c_str(), eTypeOut); oFieldDefn.SetSubType(eSubTypeOut); if (eTypeOut == eTypeIn && eSubTypeOut == eSubTypeIn) { @@ -5927,7 +5929,22 @@ bool OGRLayer::CreateFieldFromArrowSchemaInternal( } } } - return CreateField(&oFieldDefn) == OGRERR_NONE; + auto poLayerDefn = GetLayerDefn(); + const int nFieldCountBefore = poLayerDefn->GetFieldCount(); + if (CreateField(&oFieldDefn) != OGRERR_NONE || + nFieldCountBefore + 1 != poLayerDefn->GetFieldCount()) + { + return false; + } + const char *pszActualFieldName = + poLayerDefn->GetFieldDefn(nFieldCountBefore)->GetNameRef(); + if (pszActualFieldName != osWantedOGRFieldName) + { + m_poPrivate + ->m_oMapArrowFieldNameToOGRFieldName[osWantedOGRFieldName] = + pszActualFieldName; + } + return true; }; for (const auto &sType : gasArrowTypesToOGR) @@ -6158,15 +6175,15 @@ struct FieldInfo int nScale = 0; // only used for decimal fields }; -static bool -BuildOGRFieldInfo(const struct ArrowSchema *schema, struct ArrowArray *array, - const OGRFeatureDefn *poFeatureDefn, - const std::string &osFieldPrefix, - const CPLStringList &aosNativeTypes, bool &bFallbackTypesUsed, - std::vector &asFieldInfo, const char *pszFIDName, - const char *pszGeomFieldName, OGRLayer *poLayer, - const struct ArrowSchema *&schemaFIDColumn, - struct ArrowArray *&arrayFIDColumn) +static bool BuildOGRFieldInfo( + const struct ArrowSchema *schema, struct ArrowArray *array, + const OGRFeatureDefn *poFeatureDefn, const std::string &osFieldPrefix, + const CPLStringList &aosNativeTypes, bool &bFallbackTypesUsed, + std::vector &asFieldInfo, const char *pszFIDName, + const char *pszGeomFieldName, OGRLayer *poLayer, + const std::map &oMapArrowFieldNameToOGRFieldName, + const struct ArrowSchema *&schemaFIDColumn, + struct ArrowArray *&arrayFIDColumn) { const char *fieldName = schema->name; const char *format = schema->format; @@ -6178,8 +6195,9 @@ BuildOGRFieldInfo(const struct ArrowSchema *schema, struct ArrowArray *array, if (!BuildOGRFieldInfo(schema->children[i], array->children[i], poFeatureDefn, osNewPrefix, aosNativeTypes, bFallbackTypesUsed, asFieldInfo, pszFIDName, - pszGeomFieldName, poLayer, schemaFIDColumn, - arrayFIDColumn)) + pszGeomFieldName, poLayer, + oMapArrowFieldNameToOGRFieldName, + schemaFIDColumn, arrayFIDColumn)) { return false; } @@ -6227,7 +6245,17 @@ BuildOGRFieldInfo(const struct ArrowSchema *schema, struct ArrowArray *array, } else { - sInfo.iOGRFieldIdx = poFeatureDefn->GetFieldIndex(sInfo.osName.c_str()); + const auto osExpectedOGRFieldName = + [&oMapArrowFieldNameToOGRFieldName, &sInfo]() + { + const auto oIter = + oMapArrowFieldNameToOGRFieldName.find(sInfo.osName); + if (oIter != oMapArrowFieldNameToOGRFieldName.end()) + return oIter->second; + return sInfo.osName; + }(); + sInfo.iOGRFieldIdx = + poFeatureDefn->GetFieldIndex(osExpectedOGRFieldName.c_str()); if (sInfo.iOGRFieldIdx >= 0) { bool bTypeOK = false; @@ -6494,8 +6522,8 @@ BuildOGRFieldInfo(const struct ArrowSchema *schema, struct ArrowArray *array, } else { - sInfo.iOGRFieldIdx = - poFeatureDefn->GetGeomFieldIndex(sInfo.osName.c_str()); + sInfo.iOGRFieldIdx = poFeatureDefn->GetGeomFieldIndex( + osExpectedOGRFieldName.c_str()); if (sInfo.iOGRFieldIdx < 0) { if (pszGeomFieldName && pszGeomFieldName == sInfo.osName) @@ -7266,8 +7294,9 @@ bool OGRLayer::WriteArrowBatch(const struct ArrowSchema *schema, if (!BuildOGRFieldInfo(schema->children[i], array->children[i], poLayerDefn, std::string(), aosNativeTypes, bFallbackTypesUsed, asFieldInfo, pszFIDName, - pszGeomFieldName, this, schemaFIDColumn, - arrayFIDColumn)) + pszGeomFieldName, this, + m_poPrivate->m_oMapArrowFieldNameToOGRFieldName, + schemaFIDColumn, arrayFIDColumn)) { return false; }