diff --git a/autotest/ogr/ogr_parquet.py b/autotest/ogr/ogr_parquet.py
index 576cecf01d9c..8639e2612ea2 100755
--- a/autotest/ogr/ogr_parquet.py
+++ b/autotest/ogr/ogr_parquet.py
@@ -3283,3 +3283,24 @@ def test_ogr_parquet_write_to_mem(tmp_vsimem, where):
"dict",
) and "nan" not in str(src_f.GetField(j)):
assert src_f.GetField(j) == f.GetField(j), field_name
+
+
+###############################################################################
+
+
+@gdaltest.enable_exceptions()
+def test_ogr_parquet_metadata(tmp_vsimem):
+
+ outfilename = str(tmp_vsimem / "test_ogr_parquet_metadata.parquet")
+ ds = ogr.GetDriverByName("Parquet").CreateDataSource(outfilename)
+ lyr = ds.CreateLayer("test", geom_type=ogr.wkbNone)
+ lyr.SetMetadataItem("foo", "bar")
+ lyr.SetMetadata(['{"foo":["bar","baz"]}'], "json:test")
+ lyr.SetMetadata([""], "xml:test")
+ ds = None
+
+ ds = ogr.Open(outfilename)
+ lyr = ds.GetLayer(0)
+ assert lyr.GetMetadata_Dict() == {"foo": "bar"}
+ assert lyr.GetMetadata_List("json:test")[0] == '{"foo":["bar","baz"]}'
+ assert lyr.GetMetadata_List("xml:test")[0] == ""
diff --git a/doc/source/drivers/vector/parquet.rst b/doc/source/drivers/vector/parquet.rst
index 192613092e18..2f73d011306e 100644
--- a/doc/source/drivers/vector/parquet.rst
+++ b/doc/source/drivers/vector/parquet.rst
@@ -18,7 +18,7 @@ This driver also supports geometry columns using the GeoParquet specification.
.. note:: The driver should be considered experimental as the GeoParquet specification is not finalized yet.
-The GeoParquet 1.0.0-beta1 specification is supported since GDAL 3.6.2
+The GeoParquet 1.0.0 specification is supported since GDAL 3.8.0
Driver capabilities
-------------------
@@ -131,6 +131,14 @@ if the driver is built against the ``arrowdataset`` C++ library.
Note that no optimization is currently done regarding filtering.
+Metadata
+--------
+
+.. versionadded:: 3.9.0
+
+Layer metadata can be read and written. It is serialized as JSON content in a
+``gdal:metadata`` domain.
+
Multithreading
--------------
diff --git a/ogr/ogrsf_frmts/arrow/ogrfeatherlayer.cpp b/ogr/ogrsf_frmts/arrow/ogrfeatherlayer.cpp
index 7b56302f4b2d..f503e2af2e4e 100644
--- a/ogr/ogrsf_frmts/arrow/ogrfeatherlayer.cpp
+++ b/ogr/ogrsf_frmts/arrow/ogrfeatherlayer.cpp
@@ -162,7 +162,7 @@ void OGRFeatherLayer::EstablishFeatureDefn()
LoadGeoMetadata(kv_metadata.get(), "geo");
}
const auto oMapFieldNameToGDALSchemaFieldDefn =
- LoadGDALMetadata(kv_metadata.get());
+ LoadGDALSchema(kv_metadata.get());
const auto fields = m_poSchema->fields();
for (int i = 0; i < m_poSchema->num_fields(); ++i)
diff --git a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h
index 3e61cc9278e2..fc900d5a59ca 100644
--- a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h
+++ b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h
@@ -148,7 +148,9 @@ class OGRArrowLayer CPL_NON_FINAL
std::vector m_asAttributeFilterConstraints{};
std::map>
- LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata);
+ LoadGDALSchema(const arrow::KeyValueMetadata *kv_metadata);
+
+ void LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata);
OGRArrowLayer(OGRArrowDataset *poDS, const char *pszLayerName);
diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp
index 23eccc5d8203..94e7dec49680 100644
--- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp
+++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp
@@ -77,11 +77,11 @@ inline OGRArrowLayer::~OGRArrowLayer()
}
/************************************************************************/
-/* LoadGDALMetadata() */
+/* LoadGDALSchema() */
/************************************************************************/
inline std::map>
-OGRArrowLayer::LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata)
+OGRArrowLayer::LoadGDALSchema(const arrow::KeyValueMetadata *kv_metadata)
{
std::map>
oMapFieldNameToGDALSchemaFieldDefn;
@@ -164,6 +164,62 @@ OGRArrowLayer::LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata)
return oMapFieldNameToGDALSchemaFieldDefn;
}
+/************************************************************************/
+/* LoadGDALMetadata() */
+/************************************************************************/
+
+inline void
+OGRArrowLayer::LoadGDALMetadata(const arrow::KeyValueMetadata *kv_metadata)
+{
+ if (kv_metadata && kv_metadata->Contains("gdal:metadata"))
+ {
+ auto gdalMetadata = kv_metadata->Get("gdal:metadata");
+ if (gdalMetadata.ok())
+ {
+ CPLJSONDocument oDoc;
+ if (oDoc.LoadMemory(*gdalMetadata))
+ {
+ auto oRoot = oDoc.GetRoot();
+ for (auto oDomain : oRoot.GetChildren())
+ {
+ if (STARTS_WITH(oDomain.GetName().c_str(), "json:") &&
+ oDomain.GetType() == CPLJSONObject::Type::Object)
+ {
+ char **papszMD = nullptr;
+ papszMD = CSLAddString(
+ papszMD,
+ oDomain.Format(CPLJSONObject::PrettyFormat::Plain)
+ .c_str());
+ SetMetadata(papszMD, oDomain.GetName().c_str());
+ CSLDestroy(papszMD);
+ }
+ else if (STARTS_WITH(oDomain.GetName().c_str(), "xml:") &&
+ oDomain.GetType() == CPLJSONObject::Type::String)
+ {
+ char **papszMD = nullptr;
+ papszMD =
+ CSLAddString(papszMD, oDomain.ToString().c_str());
+ SetMetadata(papszMD, oDomain.GetName().c_str());
+ CSLDestroy(papszMD);
+ }
+ else
+ {
+ for (auto oItem : oDomain.GetChildren())
+ {
+ if (oItem.GetType() == CPLJSONObject::Type::String)
+ {
+ SetMetadataItem(oItem.GetName().c_str(),
+ oItem.ToString().c_str(),
+ oDomain.GetName().c_str());
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
/************************************************************************/
/* IsIntegerArrowType() */
/************************************************************************/
diff --git a/ogr/ogrsf_frmts/parquet/ogr_parquet.h b/ogr/ogrsf_frmts/parquet/ogr_parquet.h
index 1500f3ece214..2744f4e29c2b 100644
--- a/ogr/ogrsf_frmts/parquet/ogr_parquet.h
+++ b/ogr/ogrsf_frmts/parquet/ogr_parquet.h
@@ -239,11 +239,14 @@ class OGRParquetDataset final : public OGRArrowDataset
/* OGRParquetWriterLayer */
/************************************************************************/
+class OGRParquetWriterDataset;
+
class OGRParquetWriterLayer final : public OGRArrowWriterLayer
{
OGRParquetWriterLayer(const OGRParquetWriterLayer &) = delete;
OGRParquetWriterLayer &operator=(const OGRParquetWriterLayer &) = delete;
+ OGRParquetWriterDataset *m_poDataset = nullptr;
std::unique_ptr m_poFileWriter{};
std::shared_ptr m_poKeyValueMetadata{};
bool m_bForceCounterClockwiseOrientation = false;
@@ -282,12 +285,14 @@ class OGRParquetWriterLayer final : public OGRArrowWriterLayer
public:
OGRParquetWriterLayer(
- arrow::MemoryPool *poMemoryPool,
+ OGRParquetWriterDataset *poDS, arrow::MemoryPool *poMemoryPool,
const std::shared_ptr &poOutputStream,
const char *pszLayerName);
~OGRParquetWriterLayer() override;
+ CPLErr SetMetadata(char **papszMetadata, const char *pszDomain) override;
+
bool SetOptions(CSLConstList papszOptions,
const OGRSpatialReference *poSpatialRef,
OGRwkbGeometryType eGType);
@@ -356,6 +361,11 @@ class OGRParquetWriterDataset final : public GDALPamDataset
bool AddFieldDomain(std::unique_ptr &&domain,
std::string &failureReason) override;
+ GDALMultiDomainMetadata &GetMultiDomainMetadata()
+ {
+ return oMDMD;
+ }
+
protected:
OGRLayer *ICreateLayer(const char *pszName,
const OGRSpatialReference *poSpatialRef = nullptr,
diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp
index be1d1b14c21b..b43c246d1703 100644
--- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp
+++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp
@@ -65,7 +65,9 @@ void OGRParquetDatasetLayer::EstablishFeatureDefn()
LoadGeoMetadata(kv_metadata);
const auto oMapFieldNameToGDALSchemaFieldDefn =
- LoadGDALMetadata(kv_metadata.get());
+ LoadGDALSchema(kv_metadata.get());
+
+ LoadGDALMetadata(kv_metadata.get());
const auto fields = m_poSchema->fields();
for (int i = 0; i < m_poSchema->num_fields(); ++i)
diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp
index aaa74958b38d..ae65d7370c29 100644
--- a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp
+++ b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp
@@ -448,7 +448,9 @@ void OGRParquetLayer::EstablishFeatureDefn()
LoadGeoMetadata(kv_metadata);
const auto oMapFieldNameToGDALSchemaFieldDefn =
- LoadGDALMetadata(kv_metadata.get());
+ LoadGDALSchema(kv_metadata.get());
+
+ LoadGDALMetadata(kv_metadata.get());
if (!m_poArrowReader->GetSchema(&m_poSchema).ok())
{
diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetwriterdataset.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetwriterdataset.cpp
index 63bc554985d1..2ee361310384 100644
--- a/ogr/ogrsf_frmts/parquet/ogrparquetwriterdataset.cpp
+++ b/ogr/ogrsf_frmts/parquet/ogrparquetwriterdataset.cpp
@@ -87,7 +87,7 @@ OGRLayer *OGRParquetWriterDataset::ICreateLayer(
return nullptr;
}
m_poLayer = std::make_unique(
- m_poMemoryPool.get(), m_poOutputStream, pszName);
+ this, m_poMemoryPool.get(), m_poOutputStream, pszName);
if (!m_poLayer->SetOptions(papszOptions, poSpatialRef, eGType))
{
m_poLayer.reset();
diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetwriterlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetwriterlayer.cpp
index e245a19d7023..3a81ae86640d 100644
--- a/ogr/ogrsf_frmts/parquet/ogrparquetwriterlayer.cpp
+++ b/ogr/ogrsf_frmts/parquet/ogrparquetwriterlayer.cpp
@@ -40,10 +40,11 @@
/************************************************************************/
OGRParquetWriterLayer::OGRParquetWriterLayer(
- arrow::MemoryPool *poMemoryPool,
+ OGRParquetWriterDataset *poDataset, arrow::MemoryPool *poMemoryPool,
const std::shared_ptr &poOutputStream,
const char *pszLayerName)
- : OGRArrowWriterLayer(poMemoryPool, poOutputStream, pszLayerName)
+ : OGRArrowWriterLayer(poMemoryPool, poOutputStream, pszLayerName),
+ m_poDataset(poDataset)
{
m_bWriteFieldArrowExtensionName = CPLTestBool(
CPLGetConfigOption("OGR_PARQUET_WRITE_ARROW_EXTENSION_NAME", "NO"));
@@ -567,6 +568,59 @@ void OGRParquetWriterLayer::PerformStepsBeforeFinalFlushGroup()
->Append(kArrowSchemaKey, schema_base64);
}
}
+
+ // Put GDAL metadata into a gdal:metadata domain
+ CPLJSONObject oMultiMetadata;
+ bool bHasMultiMetadata = false;
+ auto &l_oMDMD = oMDMD.GetDomainList() && *(oMDMD.GetDomainList())
+ ? oMDMD
+ : m_poDataset->GetMultiDomainMetadata();
+ for (CSLConstList papszDomainIter = l_oMDMD.GetDomainList();
+ papszDomainIter && *papszDomainIter; ++papszDomainIter)
+ {
+ const char *pszDomain = *papszDomainIter;
+ CSLConstList papszMD = l_oMDMD.GetMetadata(pszDomain);
+ if (STARTS_WITH(pszDomain, "json:") && papszMD && papszMD[0])
+ {
+ CPLJSONDocument oDoc;
+ if (oDoc.LoadMemory(papszMD[0]))
+ {
+ bHasMultiMetadata = true;
+ oMultiMetadata.Add(pszDomain, oDoc.GetRoot());
+ continue;
+ }
+ }
+ else if (STARTS_WITH(pszDomain, "xml:") && papszMD && papszMD[0])
+ {
+ bHasMultiMetadata = true;
+ oMultiMetadata.Add(pszDomain, papszMD[0]);
+ continue;
+ }
+ CPLJSONObject oMetadata;
+ bool bHasMetadata = false;
+ for (CSLConstList papszMDIter = papszMD;
+ papszMDIter && *papszMDIter; ++papszMDIter)
+ {
+ char *pszKey = nullptr;
+ const char *pszValue = CPLParseNameValue(*papszMDIter, &pszKey);
+ if (pszKey && pszValue)
+ {
+ bHasMetadata = true;
+ bHasMultiMetadata = true;
+ oMetadata.Add(pszKey, pszValue);
+ }
+ CPLFree(pszKey);
+ }
+ if (bHasMetadata)
+ oMultiMetadata.Add(pszDomain, oMetadata);
+ }
+ if (bHasMultiMetadata)
+ {
+ const_cast(m_poKeyValueMetadata.get())
+ ->Append(
+ "gdal:metadata",
+ oMultiMetadata.Format(CPLJSONObject::PrettyFormat::Plain));
+ }
}
}
@@ -818,3 +872,17 @@ bool OGRParquetWriterLayer::IsArrowSchemaSupported(
return true;
}
#endif
+
+/************************************************************************/
+/* SetMetadata() */
+/************************************************************************/
+
+CPLErr OGRParquetWriterLayer::SetMetadata(char **papszMetadata,
+ const char *pszDomain)
+{
+ if (!pszDomain || !EQUAL(pszDomain, "SHAPEFILE"))
+ {
+ return OGRLayer::SetMetadata(papszMetadata, pszDomain);
+ }
+ return CE_None;
+}