Skip to content

Commit

Permalink
Multidim API: add GDALGroup::SubsetDimensionFromSelection()
Browse files Browse the repository at this point in the history
Return a virtual group whose one dimension has been subset according to a
selection.

The selection criterion is currently restricted to the form
"/path/to/array=numeric_value" (no spaces around equal)

This is similar to XArray indexing by name and label on a XArray Dataset
using the sel() method.
Cf https://docs.xarray.dev/en/latest/user-guide/indexing.html#quick-overview

For example on a EMIT L2A product
(https://github.com/nasa/EMIT-Data-Resources/blob/main/python/tutorials/Exploring_EMIT_L2A_Reflectance.ipynb),
this can be used to keep only valid bands with
SubsetDimensionFromSelection("/sensor_band_parameters/good_wavelengths=1")
  • Loading branch information
rouault committed Oct 19, 2023
1 parent b1a487f commit daeece5
Show file tree
Hide file tree
Showing 24 changed files with 1,104 additions and 158 deletions.
157 changes: 157 additions & 0 deletions autotest/gcore/multidim.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,3 +811,160 @@ def test_multidim_asclassicsubdataset_band_metadata():
"OTHER_STRING_ATTR": "string_attr_value",
"AUX_VAR": "bar",
}


@gdaltest.enable_exceptions()
def test_multidim_SubsetDimensionFromSelection():

drv = gdal.GetDriverByName("MEM")
mem_ds = drv.CreateMultiDimensional("myds")
rg = mem_ds.GetRootGroup()

numeric_attr = rg.CreateAttribute(
"numeric_attr", [], gdal.ExtendedDataType.Create(gdal.GDT_Float64)
)
assert numeric_attr.Write(1) == gdal.CE_None

subgroup = rg.CreateGroup("band_properties")

dimBand = rg.CreateDimension("band", None, None, 3)
validity = subgroup.CreateMDArray(
"validity", [dimBand], gdal.ExtendedDataType.Create(gdal.GDT_Int32)
)
validity.Write(array.array("i", [1, 0, 1]))
dimBand.SetIndexingVariable(validity)

dimX = rg.CreateDimension("X", None, None, 2)
X = rg.CreateMDArray("X", [dimX], gdal.ExtendedDataType.Create(gdal.GDT_Float64))
X.Write(array.array("d", [10, 20]))
dimX.SetIndexingVariable(X)

dimY = rg.CreateDimension("Y", None, None, 2)

ar = rg.CreateMDArray(
"ar", [dimBand, dimY, dimX], gdal.ExtendedDataType.Create(gdal.GDT_Float64)
)
assert ar.Write(array.array("d", [i for i in range(3 * 2 * 2)])) == gdal.CE_None
ar.SetUnit("foo")
ar.SetNoDataValueDouble(5)
srs = osr.SpatialReference()
srs.ImportFromEPSG(4326)
ar.SetSpatialRef(srs)

ar_numeric_attr = ar.CreateAttribute(
"ar_numeric_attr", [], gdal.ExtendedDataType.Create(gdal.GDT_Float64)
)
assert ar_numeric_attr.Write(1) == gdal.CE_None

subgroup = rg.CreateGroup("subgroup")

indexed_twice_by_band = subgroup.CreateMDArray(
"indexed_twice_by_band",
[dimBand, dimX, dimBand],
gdal.ExtendedDataType.Create(gdal.GDT_Float64),
)
assert (
indexed_twice_by_band.Write(array.array("d", [i for i in range(3 * 3 * 2)]))
== gdal.CE_None
)

rg.CreateMDArray("non_numeric_ar", [dimBand], gdal.ExtendedDataType.CreateString())

too_large_dim = rg.CreateDimension(
"too_large_dim", None, None, 10 * 1024 * 1024 + 1
)
rg.CreateMDArray(
"too_large_dim_ar", [too_large_dim], gdal.ExtendedDataType.Create(gdal.GDT_Byte)
)

same_value = rg.CreateMDArray(
"same_value", [dimX], gdal.ExtendedDataType.Create(gdal.GDT_Float64)
)
same_value.Write(array.array("d", [10, 10]))

with pytest.raises(Exception, match="Invalid value for selection"):
rg.SubsetDimensionFromSelection("")
with pytest.raises(Exception, match="Invalid value for selection"):
rg.SubsetDimensionFromSelection("/band_properties/validity")
with pytest.raises(Exception, match="Non-numeric value in selection criterion"):
rg.SubsetDimensionFromSelection("/band_properties/validity='foo'")
with pytest.raises(Exception, match="Cannot find array /i/do/not/exist"):
rg.SubsetDimensionFromSelection("/i/do/not/exist=1")
with pytest.raises(Exception, match="Array /ar is not single dimensional"):
rg.SubsetDimensionFromSelection("/ar=1")
with pytest.raises(Exception, match="Array /non_numeric_ar is not of numeric type"):
rg.SubsetDimensionFromSelection("/non_numeric_ar=1")
with pytest.raises(Exception, match="Too many values in /too_large_dim_ar"):
rg.SubsetDimensionFromSelection("/too_large_dim_ar=1")
with pytest.raises(
Exception, match="No value in /band_properties/validity matching 2.000000"
):
rg.SubsetDimensionFromSelection("/band_properties/validity=2")

# In C++, should return the same object as rg, but we can't easily check
rg.SubsetDimensionFromSelection("/same_value=10")

rg_subset = rg.SubsetDimensionFromSelection("/band_properties/validity=1")
assert rg_subset
dims = rg_subset.GetDimensions()
assert len(dims) == 4
bandFound = False
for dim in dims:
if dim.GetName() == "band":
bandFound = True
assert dim.GetFullName() == "/band"
assert dim.GetSize() == 2
var = dim.GetIndexingVariable()
assert var
assert var.GetName() == "validity"
assert var.GetFullName() == "/band_properties/validity"
assert bandFound

assert rg_subset.GetGroupNames() == rg.GetGroupNames()
with pytest.raises(Exception, match="Group i_do_not_exist does not exist"):
assert rg_subset.OpenGroup("i_do_not_exist") is None
assert len(rg_subset.GetAttributes()) == 1
assert rg_subset.GetAttribute("numeric_attr") is not None

subgroup_subset = rg_subset.OpenGroup("band_properties")
assert subgroup_subset.GetFullName() == "/band_properties"
assert subgroup_subset.GetMDArrayNames() == ["validity"]
validity_subset = subgroup_subset.OpenMDArray("validity")
assert validity_subset.GetFullName() == "/band_properties/validity"
assert validity_subset.Read() == array.array("i", [1, 1])

assert rg_subset.GetMDArrayNames() == rg.GetMDArrayNames()
with pytest.raises(Exception, match="Array i_do_not_exist does not exist"):
assert rg_subset.OpenMDArray("i_do_not_exist") is None
ar_subset = rg_subset.OpenMDArray("ar")
assert ar_subset.GetFullName() == "/ar"
assert [dim.GetFullName() for dim in ar_subset.GetDimensions()] == [
"/band",
"/Y",
"/X",
]
assert ar_subset.GetDataType() == ar.GetDataType()
assert ar_subset.Read() == array.array(
"d", [0.0, 1.0, 2.0, 3.0, 8.0, 9.0, 10.0, 11.0]
)
assert ar_subset.Read(
array_start_idx=[1, 0, 0], array_step=[-1, 1, 1]
) == array.array("d", [8.0, 9.0, 10.0, 11.0, 0.0, 1.0, 2.0, 3.0])
assert len(ar_subset.GetAttributes()) == 1
assert ar_subset.GetAttribute("ar_numeric_attr") is not None
assert ar_subset.GetUnit() == ar.GetUnit()
assert ar_subset.GetSpatialRef().IsSame(ar.GetSpatialRef())
assert ar_subset.GetNoDataValueAsRaw() == ar.GetNoDataValueAsRaw()
assert ar_subset.GetBlockSize() == [0, 1, 0]

subgroup_subset = rg_subset.OpenGroup("subgroup")
indexed_twice_by_band_subset = subgroup_subset.OpenMDArray("indexed_twice_by_band")
assert (
indexed_twice_by_band_subset.GetFullName() == "/subgroup/indexed_twice_by_band"
)
assert indexed_twice_by_band_subset.Read() == array.array(
"d", [0.0, 2.0, 3.0, 5.0, 12.0, 14.0, 15.0, 17.0]
)
assert indexed_twice_by_band_subset.Read(
array_start_idx=[1, 0, 0], array_step=[-1, 1, 1]
) == array.array("d", [12.0, 14.0, 15.0, 17.0, 0.0, 2.0, 3.0, 5.0])
28 changes: 27 additions & 1 deletion autotest/gdrivers/netcdf_multidim.py
Original file line number Diff line number Diff line change
Expand Up @@ -3751,7 +3751,7 @@ def test_netcdf_multidim_serialize_statistics_asclassicdataset(tmp_path):
filename = str(
tmp_path / "test_netcdf_multidim_serialize_statistics_asclassicdataset.nc"
)
shutil.copy("data/netcdf/byte_no_cf.nc", filename)
shutil.copy("data/netcdf/byte.nc", filename)

def test():
ds = gdal.OpenEx(filename, gdal.OF_MULTIDIM_RASTER | gdal.OF_UPDATE)
Expand All @@ -3772,6 +3772,16 @@ def test():
classic_ds = view.AsClassicDataset(1, 0)
classic_ds.GetRasterBand(1).ComputeStatistics(False)

rg_subset = rg.SubsetDimensionFromSelection("/x=440750")
ds = rg_subset.OpenMDArray("Band1").AsClassicDataset(1, 0)
ds.GetRasterBand(1).ComputeStatistics(False)

def test2():
ds = gdal.OpenEx(filename, gdal.OF_MULTIDIM_RASTER | gdal.OF_UPDATE)
rg = ds.GetRootGroup()
rg_subset = rg.SubsetDimensionFromSelection("/x=440750")
rg_subset.OpenMDArray("Band1").GetStatistics(False, force=True)

def reopen():

aux_xml = open(filename + ".aux.xml", "rb").read().decode("UTF-8")
Expand All @@ -3783,6 +3793,11 @@ def reopen():
'<DerivedDataset name="AsClassicDataset(1,0) view of Sliced view of /Band1 ([0:10,...])">'
in aux_xml
)
assert (
'<DerivedDataset name="AsClassicDataset(1,0) view of /Band1 with context Selection /x=440750">'
in aux_xml
)
assert '<Array name="/Band1" context="Selection /x=440750">' in aux_xml

ds = gdal.OpenEx(filename, gdal.OF_MULTIDIM_RASTER)
rg = ds.GetRootGroup()
Expand All @@ -3808,5 +3823,16 @@ def reopen():
-1.0,
]

rg_subset = rg.SubsetDimensionFromSelection("/x=440750")

assert rg_subset.OpenMDArray("Band1").GetStatistics(False, False).min == 107
assert rg_subset.OpenMDArray("Band1").GetStatistics(False, False).max == 197

ds = rg_subset.OpenMDArray("Band1").AsClassicDataset(1, 0)
assert ds.GetRasterBand(1).GetStatistics(False, False) == pytest.approx(
[107.0, 197.0, 149.7, 26.595300336714]
)

test()
test2()
reopen()
15 changes: 13 additions & 2 deletions frmts/hdf4/hdf4multidim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,21 @@ class HDF4Group final : public GDALGroup
std::shared_ptr<HDF4SharedResources> m_poShared;
std::shared_ptr<HDF4SDSGroup> m_poGDALGroup{};

public:
protected:
HDF4Group(const std::string &osParentName, const std::string &osName,
const std::shared_ptr<HDF4SharedResources> &poShared);

public:
static std::shared_ptr<HDF4Group>
Create(const std::string &osParentName, const std::string &osName,
const std::shared_ptr<HDF4SharedResources> &poShared)
{
auto poGroup = std::shared_ptr<HDF4Group>(
new HDF4Group(osParentName, osName, poShared));
poGroup->SetSelf(poGroup);
return poGroup;
}

std::vector<std::shared_ptr<GDALAttribute>>
GetAttributes(CSLConstList papszOptions = nullptr) const override;

Expand Down Expand Up @@ -3190,7 +3201,7 @@ void HDF4Dataset::OpenMultiDim(const char *pszFilename,

hSD = -1;

m_poRootGroup = std::make_shared<HDF4Group>(std::string(), "/", poShared);
m_poRootGroup = HDF4Group::Create(std::string(), "/", poShared);

SetDescription(pszFilename);

Expand Down
22 changes: 17 additions & 5 deletions frmts/hdf5/hdf5multidim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class HDF5Group final : public GDALGroup
static herr_t GetAttributesCallback(hid_t hGroup, const char *pszObjName,
void *);

public:
protected:
HDF5Group(
const std::string &osParentName, const std::string &osName,
const std::shared_ptr<HDF5SharedResources> &poShared,
Expand All @@ -85,6 +85,19 @@ class HDF5Group final : public GDALGroup
}
}

public:
static std::shared_ptr<HDF5Group> Create(
const std::string &osParentName, const std::string &osName,
const std::shared_ptr<HDF5SharedResources> &poShared,
const std::set<std::pair<unsigned long, unsigned long>> &oSetParentIds,
hid_t hGroup, unsigned long objIds[2])
{
auto poGroup = std::shared_ptr<HDF5Group>(new HDF5Group(
osParentName, osName, poShared, oSetParentIds, hGroup, objIds));
poGroup->SetSelf(poGroup);
return poGroup;
}

~HDF5Group()
{
H5Gclose(m_hGroup);
Expand Down Expand Up @@ -537,7 +550,7 @@ std::shared_ptr<HDF5Group> HDF5SharedResources::GetRootGroup()

auto poSharedResources = m_poSelf.lock();
CPLAssert(poSharedResources != nullptr);
return std::make_shared<HDF5Group>(
return HDF5Group::Create(
std::string(), "/", poSharedResources,
std::set<std::pair<unsigned long, unsigned long>>(), hGroup,
oStatbuf.objno);
Expand Down Expand Up @@ -778,9 +791,8 @@ std::shared_ptr<GDALGroup> HDF5Group::OpenGroup(const std::string &osName,
{
return nullptr;
}
return std::make_shared<HDF5Group>(GetFullName(), osName, m_poShared,
m_oSetParentIds, hSubGroup,
oStatbuf.objno);
return HDF5Group::Create(GetFullName(), osName, m_poShared, m_oSetParentIds,
hSubGroup, oStatbuf.objno);
}

/************************************************************************/
Expand Down
17 changes: 10 additions & 7 deletions frmts/mem/memdataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1452,8 +1452,9 @@ std::shared_ptr<GDALGroup> MEMGroup::OpenGroup(const std::string &osName,
std::shared_ptr<MEMGroup> MEMGroup::Create(const std::string &osParentName,
const char *pszName)
{
auto newGroup(std::make_shared<MEMGroup>(osParentName, pszName));
newGroup->m_pSelf = newGroup;
auto newGroup(
std::shared_ptr<MEMGroup>(new MEMGroup(osParentName, pszName)));
newGroup->SetSelf(newGroup);
return newGroup;
}

Expand All @@ -1479,7 +1480,7 @@ std::shared_ptr<GDALGroup> MEMGroup::CreateGroup(const std::string &osName,
return nullptr;
}
auto newGroup = MEMGroup::Create(GetFullName(), osName.c_str());
newGroup->m_pParent = m_pSelf;
newGroup->m_pParent = std::dynamic_pointer_cast<MEMGroup>(m_pSelf.lock());
m_oMapGroups[osName] = newGroup;
return newGroup;
}
Expand Down Expand Up @@ -1717,8 +1718,9 @@ MEMGroup::CreateAttribute(const std::string &osName,
"An attribute with same name already exists");
return nullptr;
}
auto newAttr(
MEMAttribute::Create(m_pSelf.lock(), osName, anDimensions, oDataType));
auto newAttr(MEMAttribute::Create(
std::dynamic_pointer_cast<MEMGroup>(m_pSelf.lock()), osName,
anDimensions, oDataType));
if (!newAttr)
return nullptr;
m_oMapAttributes[osName] = newAttr;
Expand Down Expand Up @@ -3049,8 +3051,9 @@ MEMGroup::CreateDimension(const std::string &osName, const std::string &osType,
"A dimension with same name already exists");
return nullptr;
}
auto newDim(MEMDimension::Create(m_pSelf.lock(), osName, osType,
osDirection, nSize));
auto newDim(MEMDimension::Create(
std::dynamic_pointer_cast<MEMGroup>(m_pSelf.lock()), osName, osType,
osDirection, nSize));
m_oMapDimensions[osName] = newDim;
return newDim;
}
Expand Down
3 changes: 1 addition & 2 deletions frmts/mem/memmultidim.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ class CPL_DLL MEMGroup CPL_NON_FINAL : public GDALGroup,
std::map<CPLString, std::shared_ptr<GDALGroup>> m_oMapGroups{};
std::map<CPLString, std::shared_ptr<GDALMDArray>> m_oMapMDArrays{};
std::map<CPLString, std::shared_ptr<GDALDimension>> m_oMapDimensions{};
std::weak_ptr<MEMGroup> m_pSelf{};
std::weak_ptr<MEMGroup> m_pParent{};

protected:
Expand All @@ -82,14 +81,14 @@ class CPL_DLL MEMGroup CPL_NON_FINAL : public GDALGroup,

void NotifyChildrenOfDeletion() override;

public:
MEMGroup(const std::string &osParentName, const char *pszName)
: GDALGroup(osParentName, pszName ? pszName : "")
{
if (!osParentName.empty() && !pszName)
m_osFullName = osParentName;
}

public:
static std::shared_ptr<MEMGroup> Create(const std::string &osParentName,
const char *pszName);

Expand Down
Loading

0 comments on commit daeece5

Please sign in to comment.