diff --git a/autotest/ogr/ogr_parquet.py b/autotest/ogr/ogr_parquet.py index 576cecf01d9c..8639e2612ea2 100755 --- a/autotest/ogr/ogr_parquet.py +++ b/autotest/ogr/ogr_parquet.py @@ -3283,3 +3283,24 @@ def test_ogr_parquet_write_to_mem(tmp_vsimem, where): "dict", ) and "nan" not in str(src_f.GetField(j)): assert src_f.GetField(j) == f.GetField(j), field_name + + +############################################################################### + + +@gdaltest.enable_exceptions() +def test_ogr_parquet_metadata(tmp_vsimem): + + outfilename = str(tmp_vsimem / "test_ogr_parquet_metadata.parquet") + ds = ogr.GetDriverByName("Parquet").CreateDataSource(outfilename) + lyr = ds.CreateLayer("test", geom_type=ogr.wkbNone) + lyr.SetMetadataItem("foo", "bar") + lyr.SetMetadata(['{"foo":["bar","baz"]}'], "json:test") + lyr.SetMetadata([""], "xml:test") + ds = None + + ds = ogr.Open(outfilename) + lyr = ds.GetLayer(0) + assert lyr.GetMetadata_Dict() == {"foo": "bar"} + assert lyr.GetMetadata_List("json:test")[0] == '{"foo":["bar","baz"]}' + assert lyr.GetMetadata_List("xml:test")[0] == "" diff --git a/doc/source/drivers/vector/parquet.rst b/doc/source/drivers/vector/parquet.rst index 192613092e18..2f73d011306e 100644 --- a/doc/source/drivers/vector/parquet.rst +++ b/doc/source/drivers/vector/parquet.rst @@ -18,7 +18,7 @@ This driver also supports geometry columns using the GeoParquet specification. .. note:: The driver should be considered experimental as the GeoParquet specification is not finalized yet. -The GeoParquet 1.0.0-beta1 specification is supported since GDAL 3.6.2 +The GeoParquet 1.0.0 specification is supported since GDAL 3.8.0 Driver capabilities ------------------- @@ -131,6 +131,14 @@ if the driver is built against the ``arrowdataset`` C++ library. Note that no optimization is currently done regarding filtering. +Metadata +-------- + +.. versionadded:: 3.9.0 + +Layer metadata can be read and written. It is serialized as JSON content in a +``gdal:metadata`` domain. + Multithreading -------------- diff --git a/ogr/ogrsf_frmts/arrow/ogrfeatherlayer.cpp b/ogr/ogrsf_frmts/arrow/ogrfeatherlayer.cpp index 7b56302f4b2d..f503e2af2e4e 100644 --- a/ogr/ogrsf_frmts/arrow/ogrfeatherlayer.cpp +++ b/ogr/ogrsf_frmts/arrow/ogrfeatherlayer.cpp @@ -162,7 +162,7 @@ void OGRFeatherLayer::EstablishFeatureDefn() LoadGeoMetadata(kv_metadata.get(), "geo"); } const auto oMapFieldNameToGDALSchemaFieldDefn = - LoadGDALMetadata(kv_metadata.get()); + LoadGDALSchema(kv_metadata.get()); const auto fields = m_poSchema->fields(); for (int i = 0; i < m_poSchema->num_fields(); ++i) diff --git a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h index 3e61cc9278e2..fc900d5a59ca 100644 --- a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h +++ b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h @@ -148,7 +148,9 @@ class OGRArrowLayer CPL_NON_FINAL std::vector m_asAttributeFilterConstraints{}; std::map> - LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata); + LoadGDALSchema(const arrow::KeyValueMetadata *kv_metadata); + + void LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata); OGRArrowLayer(OGRArrowDataset *poDS, const char *pszLayerName); diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp index 23eccc5d8203..94e7dec49680 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp @@ -77,11 +77,11 @@ inline OGRArrowLayer::~OGRArrowLayer() } /************************************************************************/ -/* LoadGDALMetadata() */ +/* LoadGDALSchema() */ /************************************************************************/ inline std::map> -OGRArrowLayer::LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata) +OGRArrowLayer::LoadGDALSchema(const arrow::KeyValueMetadata *kv_metadata) { std::map> oMapFieldNameToGDALSchemaFieldDefn; @@ -164,6 +164,62 @@ OGRArrowLayer::LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata) return oMapFieldNameToGDALSchemaFieldDefn; } +/************************************************************************/ +/* LoadGDALMetadata() */ +/************************************************************************/ + +inline void +OGRArrowLayer::LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata) +{ + if (kv_metadata && kv_metadata->Contains("gdal:metadata")) + { + auto gdalMetadata = kv_metadata->Get("gdal:metadata"); + if (gdalMetadata.ok()) + { + CPLJSONDocument oDoc; + if (oDoc.LoadMemory(*gdalMetadata)) + { + auto oRoot = oDoc.GetRoot(); + for (auto oDomain : oRoot.GetChildren()) + { + if (STARTS_WITH(oDomain.GetName().c_str(), "json:") && + oDomain.GetType() == CPLJSONObject::Type::Object) + { + char **papszMD = nullptr; + papszMD = CSLAddString( + papszMD, + oDomain.Format(CPLJSONObject::PrettyFormat::Plain) + .c_str()); + SetMetadata(papszMD, oDomain.GetName().c_str()); + CSLDestroy(papszMD); + } + else if (STARTS_WITH(oDomain.GetName().c_str(), "xml:") && + oDomain.GetType() == CPLJSONObject::Type::String) + { + char **papszMD = nullptr; + papszMD = + CSLAddString(papszMD, oDomain.ToString().c_str()); + SetMetadata(papszMD, oDomain.GetName().c_str()); + CSLDestroy(papszMD); + } + else + { + for (auto oItem : oDomain.GetChildren()) + { + if (oItem.GetType() == CPLJSONObject::Type::String) + { + SetMetadataItem(oItem.GetName().c_str(), + oItem.ToString().c_str(), + oDomain.GetName().c_str()); + } + } + } + } + } + } + } +} + /************************************************************************/ /* IsIntegerArrowType() */ /************************************************************************/ diff --git a/ogr/ogrsf_frmts/parquet/ogr_parquet.h b/ogr/ogrsf_frmts/parquet/ogr_parquet.h index 1500f3ece214..2744f4e29c2b 100644 --- a/ogr/ogrsf_frmts/parquet/ogr_parquet.h +++ b/ogr/ogrsf_frmts/parquet/ogr_parquet.h @@ -239,11 +239,14 @@ class OGRParquetDataset final : public OGRArrowDataset /* OGRParquetWriterLayer */ /************************************************************************/ +class OGRParquetWriterDataset; + class OGRParquetWriterLayer final : public OGRArrowWriterLayer { OGRParquetWriterLayer(const OGRParquetWriterLayer &) = delete; OGRParquetWriterLayer &operator=(const OGRParquetWriterLayer &) = delete; + OGRParquetWriterDataset *m_poDataset = nullptr; std::unique_ptr m_poFileWriter{}; std::shared_ptr m_poKeyValueMetadata{}; bool m_bForceCounterClockwiseOrientation = false; @@ -282,12 +285,14 @@ class OGRParquetWriterLayer final : public OGRArrowWriterLayer public: OGRParquetWriterLayer( - arrow::MemoryPool *poMemoryPool, + OGRParquetWriterDataset *poDS, arrow::MemoryPool *poMemoryPool, const std::shared_ptr &poOutputStream, const char *pszLayerName); ~OGRParquetWriterLayer() override; + CPLErr SetMetadata(char **papszMetadata, const char *pszDomain) override; + bool SetOptions(CSLConstList papszOptions, const OGRSpatialReference *poSpatialRef, OGRwkbGeometryType eGType); @@ -356,6 +361,11 @@ class OGRParquetWriterDataset final : public GDALPamDataset bool AddFieldDomain(std::unique_ptr &&domain, std::string &failureReason) override; + GDALMultiDomainMetadata &GetMultiDomainMetadata() + { + return oMDMD; + } + protected: OGRLayer *ICreateLayer(const char *pszName, const OGRSpatialReference *poSpatialRef = nullptr, diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index be1d1b14c21b..b43c246d1703 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -65,7 +65,9 @@ void OGRParquetDatasetLayer::EstablishFeatureDefn() LoadGeoMetadata(kv_metadata); const auto oMapFieldNameToGDALSchemaFieldDefn = - LoadGDALMetadata(kv_metadata.get()); + LoadGDALSchema(kv_metadata.get()); + + LoadGDALMetadata(kv_metadata.get()); const auto fields = m_poSchema->fields(); for (int i = 0; i < m_poSchema->num_fields(); ++i) diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp index aaa74958b38d..ae65d7370c29 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp @@ -448,7 +448,9 @@ void OGRParquetLayer::EstablishFeatureDefn() LoadGeoMetadata(kv_metadata); const auto oMapFieldNameToGDALSchemaFieldDefn = - LoadGDALMetadata(kv_metadata.get()); + LoadGDALSchema(kv_metadata.get()); + + LoadGDALMetadata(kv_metadata.get()); if (!m_poArrowReader->GetSchema(&m_poSchema).ok()) { diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetwriterdataset.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetwriterdataset.cpp index 63bc554985d1..2ee361310384 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetwriterdataset.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetwriterdataset.cpp @@ -87,7 +87,7 @@ OGRLayer *OGRParquetWriterDataset::ICreateLayer( return nullptr; } m_poLayer = std::make_unique( - m_poMemoryPool.get(), m_poOutputStream, pszName); + this, m_poMemoryPool.get(), m_poOutputStream, pszName); if (!m_poLayer->SetOptions(papszOptions, poSpatialRef, eGType)) { m_poLayer.reset(); diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetwriterlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetwriterlayer.cpp index e245a19d7023..3a81ae86640d 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetwriterlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetwriterlayer.cpp @@ -40,10 +40,11 @@ /************************************************************************/ OGRParquetWriterLayer::OGRParquetWriterLayer( - arrow::MemoryPool *poMemoryPool, + OGRParquetWriterDataset *poDataset, arrow::MemoryPool *poMemoryPool, const std::shared_ptr &poOutputStream, const char *pszLayerName) - : OGRArrowWriterLayer(poMemoryPool, poOutputStream, pszLayerName) + : OGRArrowWriterLayer(poMemoryPool, poOutputStream, pszLayerName), + m_poDataset(poDataset) { m_bWriteFieldArrowExtensionName = CPLTestBool( CPLGetConfigOption("OGR_PARQUET_WRITE_ARROW_EXTENSION_NAME", "NO")); @@ -567,6 +568,59 @@ void OGRParquetWriterLayer::PerformStepsBeforeFinalFlushGroup() ->Append(kArrowSchemaKey, schema_base64); } } + + // Put GDAL metadata into a gdal:metadata domain + CPLJSONObject oMultiMetadata; + bool bHasMultiMetadata = false; + auto &l_oMDMD = oMDMD.GetDomainList() && *(oMDMD.GetDomainList()) + ? oMDMD + : m_poDataset->GetMultiDomainMetadata(); + for (CSLConstList papszDomainIter = l_oMDMD.GetDomainList(); + papszDomainIter && *papszDomainIter; ++papszDomainIter) + { + const char *pszDomain = *papszDomainIter; + CSLConstList papszMD = l_oMDMD.GetMetadata(pszDomain); + if (STARTS_WITH(pszDomain, "json:") && papszMD && papszMD[0]) + { + CPLJSONDocument oDoc; + if (oDoc.LoadMemory(papszMD[0])) + { + bHasMultiMetadata = true; + oMultiMetadata.Add(pszDomain, oDoc.GetRoot()); + continue; + } + } + else if (STARTS_WITH(pszDomain, "xml:") && papszMD && papszMD[0]) + { + bHasMultiMetadata = true; + oMultiMetadata.Add(pszDomain, papszMD[0]); + continue; + } + CPLJSONObject oMetadata; + bool bHasMetadata = false; + for (CSLConstList papszMDIter = papszMD; + papszMDIter && *papszMDIter; ++papszMDIter) + { + char *pszKey = nullptr; + const char *pszValue = CPLParseNameValue(*papszMDIter, &pszKey); + if (pszKey && pszValue) + { + bHasMetadata = true; + bHasMultiMetadata = true; + oMetadata.Add(pszKey, pszValue); + } + CPLFree(pszKey); + } + if (bHasMetadata) + oMultiMetadata.Add(pszDomain, oMetadata); + } + if (bHasMultiMetadata) + { + const_cast(m_poKeyValueMetadata.get()) + ->Append( + "gdal:metadata", + oMultiMetadata.Format(CPLJSONObject::PrettyFormat::Plain)); + } } } @@ -818,3 +872,17 @@ bool OGRParquetWriterLayer::IsArrowSchemaSupported( return true; } #endif + +/************************************************************************/ +/* SetMetadata() */ +/************************************************************************/ + +CPLErr OGRParquetWriterLayer::SetMetadata(char **papszMetadata, + const char *pszDomain) +{ + if (!pszDomain || !EQUAL(pszDomain, "SHAPEFILE")) + { + return OGRLayer::SetMetadata(papszMetadata, pszDomain); + } + return CE_None; +}