Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for variable length string arrays #25

Merged
merged 6 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions src/main/java/org/janelia/saalfeldlab/n5/hdf5/N5HDF5Reader.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
*/
package org.janelia.saalfeldlab.n5.hdf5;

import ch.systemsx.cisd.base.mdarray.MDArray;
import ch.systemsx.cisd.hdf5.HDF5DataSetInformation;
import ch.systemsx.cisd.hdf5.HDF5DataTypeInformation;
import ch.systemsx.cisd.hdf5.HDF5Factory;
Expand All @@ -35,7 +36,6 @@
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.JsonSyntaxException;
import com.google.gson.reflect.TypeToken;
import hdf.hdf5lib.exceptions.HDF5Exception;
import org.janelia.saalfeldlab.n5.Compression;
import org.janelia.saalfeldlab.n5.Compression.CompressionType;
Expand All @@ -50,6 +50,7 @@
import org.janelia.saalfeldlab.n5.N5Reader;
import org.janelia.saalfeldlab.n5.N5URI;
import org.janelia.saalfeldlab.n5.RawCompression;
import org.janelia.saalfeldlab.n5.StringDataBlock;
import org.janelia.saalfeldlab.n5.hdf5.N5HDF5Util.OpenDataSetCache;
import org.janelia.saalfeldlab.n5.hdf5.N5HDF5Util.OpenDataSetCache.OpenDataSet;
import org.scijava.util.VersionUtils;
Expand All @@ -61,6 +62,7 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.FileSystems;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -178,11 +180,10 @@ public N5HDF5Reader(
* @param defaultBlockSize for all dimensions > defaultBlockSize.length, and for all
* dimensions with defaultBlockSize[i] <= 0, the size of the
* dataset will be used
* @throws IOException the exception
*/
public N5HDF5Reader(
final IHDF5Reader reader,
final int... defaultBlockSize) throws IOException {
final int... defaultBlockSize) {

this(reader, false, defaultBlockSize);
}
Expand Down Expand Up @@ -562,6 +563,8 @@ protected static DataType getDataType(final HDF5DataSetInformation datasetInfo)
return DataType.FLOAT64;
else if (type.isAssignableFrom(float.class))
return DataType.FLOAT32;
else if (type.isAssignableFrom(String.class))
return DataType.STRING;

System.err.println("Datasets of type " + typeInfo + " not yet implemented.");
return null;
Expand Down Expand Up @@ -678,6 +681,12 @@ public DataBlock<?> readBlock(
final long[] hdf5CroppedBlockSize = reorderToLong(croppedBlockSize);
reorder(hdf5Offset);

if (datasetAttributes.getDataType() == DataType.STRING) {
final int[] intHdf5CroppedBlockSize = Arrays.stream(hdf5CroppedBlockSize).mapToInt(i -> (int)i).toArray();
MDArray<String> data = reader.string().readMDArrayBlockWithOffset(normalizedPathName, intHdf5CroppedBlockSize, hdf5Offset);
return new StringDataBlock(croppedBlockSize, gridPosition, data.getAsFlatArray());
}

final DataType dataType = datasetAttributes.getDataType();
final long memTypeId;
try {
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/janelia/saalfeldlab/n5/hdf5/N5HDF5Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ static long memTypeId(final DataType dataType) {
return H5T_NATIVE_FLOAT;
case FLOAT64:
return H5T_NATIVE_DOUBLE;
case STRING:
throw new IllegalStateException("MemTypeId for STRING is not defined and should not be queried.");
default:
throw new IllegalArgumentException();
}
Expand Down
16 changes: 15 additions & 1 deletion src/main/java/org/janelia/saalfeldlab/n5/hdf5/N5HDF5Writer.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
*/
package org.janelia.saalfeldlab.n5.hdf5;

import ch.systemsx.cisd.base.mdarray.MDArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;

Expand All @@ -50,6 +51,7 @@

import ch.systemsx.cisd.hdf5.HDF5Factory;
import ch.systemsx.cisd.hdf5.HDF5FloatStorageFeatures;
import ch.systemsx.cisd.hdf5.HDF5GenericStorageFeatures;
import ch.systemsx.cisd.hdf5.HDF5IntStorageFeatures;
import ch.systemsx.cisd.hdf5.IHDF5Writer;
import org.janelia.saalfeldlab.n5.hdf5.N5HDF5Util.OpenDataSetCache.OpenDataSet;
Expand Down Expand Up @@ -220,14 +222,17 @@ public void createDataset(
final HDF5IntStorageFeatures intCompression;
final HDF5IntStorageFeatures uintCompression;
final HDF5FloatStorageFeatures floatCompression;
final HDF5GenericStorageFeatures stringCompression;
if (compression instanceof RawCompression) {
floatCompression = HDF5FloatStorageFeatures.FLOAT_NO_COMPRESSION;
intCompression = HDF5IntStorageFeatures.INT_NO_COMPRESSION;
uintCompression = HDF5IntStorageFeatures.INT_NO_COMPRESSION_UNSIGNED;
stringCompression = HDF5GenericStorageFeatures.GENERIC_NO_COMPRESSION;
} else {
floatCompression = HDF5FloatStorageFeatures.FLOAT_SHUFFLE_DEFLATE;
intCompression = HDF5IntStorageFeatures.INT_AUTO_SCALING_DEFLATE;
uintCompression = HDF5IntStorageFeatures.INT_AUTO_SCALING_DEFLATE_UNSIGNED;
stringCompression = HDF5GenericStorageFeatures.GENERIC_DEFLATE;
}

if (writer.exists(pathName))
Expand Down Expand Up @@ -268,6 +273,9 @@ public void createDataset(
break;
case FLOAT64:
writer.float64().createMDArray(pathName, hdf5Dimensions, hdf5BlockSize, floatCompression);
break;
case STRING:
writer.string().createMDArrayVL(pathName, hdf5Dimensions, hdf5BlockSize, stringCompression);
default:
return;
}
Expand Down Expand Up @@ -312,7 +320,7 @@ public <T> void setAttribute(
final String[] attributePathTokens = normalizedKey.split("/");
final boolean isPath =
attributePathTokens.length > 2
|| attributePathTokens.length > 1 && attributePathTokens[0].length() > 0
|| attributePathTokens.length > 1 && !attributePathTokens[0].isEmpty()
|| N5URI.ARRAY_INDEX.asPredicate().test(normalizedKey)
|| containsEscapeCharacters(normalizedKey);
if (isRoot || isPath ) {
Expand Down Expand Up @@ -515,6 +523,12 @@ public <T> void writeBlock(
final long[] hdf5DataBlockSize = reorderToLong(dataBlock.getSize());
final long[] hdf5Offset = reorderMultiplyToLong(dataBlock.getGridPosition(), datasetAttributes.getBlockSize());

if (datasetAttributes.getDataType() == DataType.STRING) {
MDArray<String> arr = new MDArray<>((String[]) dataBlock.getData(), hdf5DataBlockSize);
writer.string().writeMDArrayBlockWithOffset(pathName, arr, hdf5Offset);
return;
}

try (OpenDataSet dataset = openDataSetCache.get(pathName)) {
final long memorySpaceId = H5Screate_simple(hdf5DataBlockSize.length, hdf5DataBlockSize, null);
final long fileSpaceId = H5Dget_space(dataset.dataSetId);
Expand Down
53 changes: 27 additions & 26 deletions src/test/java/org/janelia/saalfeldlab/n5/hdf5/N5HDF5Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;

Expand Down Expand Up @@ -72,7 +73,7 @@
*/
public class N5HDF5Test extends AbstractN5Test {

private static int[] defaultBlockSize = new int[]{5, 6, 7};
private static final int[] defaultBlockSize = new int[]{5, 6, 7};
public static class Structured {

public String name = "";
Expand Down Expand Up @@ -107,7 +108,7 @@ protected Compression[] getCompressions() {
return Files.createTempFile("n5-hdf5-test-", ".hdf5").toFile().getCanonicalPath();
}

@Override protected N5HDF5Writer createN5Writer() throws IOException, URISyntaxException {
@Override protected N5HDF5Writer createN5Writer() throws IOException {

final String location = tempN5Location();
final String hdf5Path = resolveTestHdf5Path(location);
Expand Down Expand Up @@ -193,7 +194,7 @@ else if (dataBlock instanceof DoubleArrayDataBlock)

@Override
@Test
public void testVersion() throws NumberFormatException, IOException, URISyntaxException {
public void testVersion() throws NumberFormatException, IOException {

try (N5Writer n5 = createN5Writer()) {

Expand All @@ -214,7 +215,7 @@ public void testVersion() throws NumberFormatException, IOException, URISyntaxEx

@Override
@Test
public void testSetAttributeDoesntCreateGroup() throws IOException, URISyntaxException {
public void testSetAttributeDoesntCreateGroup() throws IOException {

try (final N5Writer writer = createN5Writer()) {
final String testGroup = "/group/should/not/exit";
Expand All @@ -225,7 +226,7 @@ public void testSetAttributeDoesntCreateGroup() throws IOException, URISyntaxExc
}

@Test
public void testOverrideBlockSize() throws IOException, URISyntaxException {
public void testOverrideBlockSize() throws IOException {

try (N5Writer n5HDF5Writer = createN5Writer()) {
final String testFilePath = n5HDF5Writer.getURI().getPath();
Expand All @@ -246,7 +247,7 @@ public void testOverrideBlockSize() throws IOException, URISyntaxException {
}

@Test
public void testDefaultBlockSizeGetter() throws IOException, URISyntaxException {
public void testDefaultBlockSizeGetter() throws IOException {
// do not pass array
{
try (final N5HDF5Writer h5 = createN5Writer()) {
Expand All @@ -263,7 +264,7 @@ public void testDefaultBlockSizeGetter() throws IOException, URISyntaxException
}

@Test
public void testOverrideBlockSizeGetter() throws IOException, URISyntaxException {
public void testOverrideBlockSizeGetter() throws IOException {
// default behavior
try (final N5HDF5Writer h5 = createN5Writer()) {
final String testFilePath = h5.getURI().getPath();
Expand All @@ -280,7 +281,7 @@ public void testOverrideBlockSizeGetter() throws IOException, URISyntaxException
}

@Test
public void testFilenameGetter() throws IOException, URISyntaxException {
public void testFilenameGetter() throws IOException {

try (final N5HDF5Writer h5 = createN5Writer()) {
final String testFilePath = h5.getURI().getPath();
Expand All @@ -290,7 +291,7 @@ public void testFilenameGetter() throws IOException, URISyntaxException {
}

@Test
public void testStructuredAttributes() throws IOException, URISyntaxException {
public void testStructuredAttributes() throws IOException {

try (N5Writer n5 = createN5Writer()) {
final Structured attribute = new Structured();
Expand Down Expand Up @@ -399,7 +400,7 @@ public void testType() {
/*
* Differs from AbstractN5Test since an int will be read back as int, not a long
*/
public void testListAttributes() throws IOException, URISyntaxException {
public void testListAttributes() throws IOException {

try (N5Writer n5 = createN5Writer()) {

Expand All @@ -416,15 +417,15 @@ public void testListAttributes() throws IOException, URISyntaxException {
n5.setAttribute(datasetName2, "attr8", new Object[] {"1", 2, 3.1});

Map<String, Class<?>> attributesMap = n5.listAttributes(datasetName2);
assertTrue(attributesMap.get("attr1") == double[].class);
assertTrue(attributesMap.get("attr2") == String[].class);
assertTrue(attributesMap.get("attr3") == double.class);
assertTrue(attributesMap.get("attr4") == String.class);
assertTrue(attributesMap.get("attr5") == long[].class);
assertSame(attributesMap.get("attr1"), double[].class);
assertSame(attributesMap.get("attr2"), String[].class);
assertSame(attributesMap.get("attr3"), double.class);
assertSame(attributesMap.get("attr4"), String.class);
assertSame(attributesMap.get("attr5"), long[].class);
//HDF5 will parse an int as an int rather than a long
assertTrue(attributesMap.get("attr6") == int.class);
assertTrue(attributesMap.get("attr7") == double[].class);
assertTrue(attributesMap.get("attr8") == Object[].class);
assertSame(attributesMap.get("attr6"), int.class);
assertSame(attributesMap.get("attr7"), double[].class);
assertSame(attributesMap.get("attr8"), Object[].class);

n5.createGroup(groupName2);
n5.setAttribute(groupName2, "attr1", new double[] {1.1, 2.1, 3.1});
Expand All @@ -437,15 +438,15 @@ public void testListAttributes() throws IOException, URISyntaxException {
n5.setAttribute(groupName2, "attr8", new Object[] {"1", 2, 3.1});

attributesMap = n5.listAttributes(groupName2);
assertTrue(attributesMap.get("attr1") == double[].class);
assertTrue(attributesMap.get("attr2") == String[].class);
assertTrue(attributesMap.get("attr3") == double.class);
assertTrue(attributesMap.get("attr4") == String.class);
assertTrue(attributesMap.get("attr5") == long[].class);
assertSame(attributesMap.get("attr1"), double[].class);
assertSame(attributesMap.get("attr2"), String[].class);
assertSame(attributesMap.get("attr3"), double.class);
assertSame(attributesMap.get("attr4"), String.class);
assertSame(attributesMap.get("attr5"), long[].class);
//HDF5 will parse an int as an int rather than a long
assertTrue(attributesMap.get("attr6") == int.class);
assertTrue(attributesMap.get("attr7") == double[].class);
assertTrue(attributesMap.get("attr8") == Object[].class);
assertSame(attributesMap.get("attr6"), int.class);
assertSame(attributesMap.get("attr7"), double[].class);
assertSame(attributesMap.get("attr8"), Object[].class);
}
}
}