From 39722e7019e3af5f8079c8f3d4d734dd8c866aeb Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Fri, 4 Oct 2024 16:37:44 -0400
Subject: [PATCH 01/40] Save progress for next week

---
 kerchunk/combine.py                   |  8 ++++----
 kerchunk/fits.py                      |  2 +-
 kerchunk/grib2.py                     |  4 ++--
 kerchunk/hdf4.py                      |  2 +-
 kerchunk/netCDF3.py                   |  2 +-
 kerchunk/tests/test_combine.py        |  6 +++---
 kerchunk/tests/test_combine_concat.py | 20 ++++++++++----------
 kerchunk/tests/test_fits.py           | 10 +++++-----
 kerchunk/tests/test_grib.py           | 10 +++++-----
 kerchunk/tests/test_hdf.py            | 20 ++++++++++----------
 kerchunk/tests/test_tiff.py           |  4 ++--
 kerchunk/tests/test_utils.py          |  8 ++++----
 kerchunk/utils.py                     |  2 +-
 pyproject.toml                        |  2 +-
 14 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/kerchunk/combine.py b/kerchunk/combine.py
index eb891de1..155ba4c9 100644
--- a/kerchunk/combine.py
+++ b/kerchunk/combine.py
@@ -203,7 +203,7 @@ def append(
         ds = xr.open_dataset(
             fs.get_mapper(), engine="zarr", backend_kwargs={"consolidated": False}
         )
-        z = zarr.open(fs.get_mapper())
+        z = zarr.open(fs.get_mapper(), zarr_version=2)
         mzz = MultiZarrToZarr(
             path,
             out=fs.references,  # dict or parquet/lazy
@@ -360,7 +360,7 @@ def first_pass(self):
                 fs._dircache_from_items()
 
             logger.debug("First pass: %s", i)
-            z = zarr.open_group(fs.get_mapper(""))
+            z = zarr.open_group(fs.get_mapper(""), zarr_version=2)
             for var in self.concat_dims:
                 value = self._get_value(i, z, var, fn=self._paths[i])
                 if isinstance(value, np.ndarray):
@@ -387,7 +387,7 @@ def store_coords(self):
         """
         kv = {}
         store = zarr.storage.KVStore(kv)
-        group = zarr.open(store)
+        group = zarr.open(store, zarr_version=2)
         m = self.fss[0].get_mapper("")
         z = zarr.open(m)
         for k, v in self.coos.items():
@@ -461,7 +461,7 @@ def second_pass(self):
         for i, fs in enumerate(self.fss):
             to_download = {}
             m = fs.get_mapper("")
-            z = zarr.open(m)
+            z = zarr.open(m, zarr_version=2)
 
             if no_deps is None:
                 # done first time only
diff --git a/kerchunk/fits.py b/kerchunk/fits.py
index 18729a9b..f714af97 100644
--- a/kerchunk/fits.py
+++ b/kerchunk/fits.py
@@ -72,7 +72,7 @@ def process_file(
 
     storage_options = storage_options or {}
     out = out or {}
-    g = zarr.open(out)
+    g = zarr.open(out, zarr_version=2)
 
     with fsspec.open(url, mode="rb", **storage_options) as f:
         infile = fits.open(f, do_not_scale_image_data=True)
diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py
index f105fe8b..06108db5 100644
--- a/kerchunk/grib2.py
+++ b/kerchunk/grib2.py
@@ -191,7 +191,7 @@ def scan_grib(
             if good is False:
                 continue
 
-            z = zarr.open_group(store)
+            z = zarr.open_group(store, zarr_version=2)
             global_attrs = {
                 f"GRIB_{k}": m[k]
                 for k in cfgrib.dataset.GLOBAL_ATTRIBUTES_KEYS
@@ -398,7 +398,7 @@ def grib_tree(
 
     # TODO allow passing a LazyReferenceMapper as output?
     zarr_store = {}
-    zroot = zarr.open_group(store=zarr_store)
+    zroot = zarr.open_group(store=zarr_store, zarr_version=2)
 
     aggregations: Dict[str, List] = defaultdict(list)
     aggregation_dims: Dict[str, Set] = defaultdict(set)
diff --git a/kerchunk/hdf4.py b/kerchunk/hdf4.py
index 483ffba7..4235d139 100644
--- a/kerchunk/hdf4.py
+++ b/kerchunk/hdf4.py
@@ -144,7 +144,7 @@ def translate(self, filename=None, storage_options=None):
             remote_protocol=prot,
             remote_options=self.st,
         )
-        g = zarr.open_group("reference://", storage_options=dict(fs=fs))
+        g = zarr.open_group("reference://", storage_options=dict(fs=fs), zarr_version=2)
         refs = {}
         for k, v in output.items():
             if isinstance(v, dict):
diff --git a/kerchunk/netCDF3.py b/kerchunk/netCDF3.py
index d43b6b97..8e0994ca 100644
--- a/kerchunk/netCDF3.py
+++ b/kerchunk/netCDF3.py
@@ -167,7 +167,7 @@ def translate(self):
         import zarr
 
         out = self.out
-        z = zarr.open(out, mode="w")
+        z = zarr.open(out, mode="w", zarr_version=2)
         for dim, var in self.variables.items():
             if dim in self.chunks:
                 shape = self.chunks[dim][-1]
diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py
index 13994921..1b5713b2 100644
--- a/kerchunk/tests/test_combine.py
+++ b/kerchunk/tests/test_combine.py
@@ -133,14 +133,14 @@
 
 # simple time arrays - xarray can't make these!
 m = fs.get_mapper("time1.zarr")
-z = zarr.open(m, mode="w")
+z = zarr.open(m, mode="w", zarr_version=2)
 ar = z.create_dataset("time", data=np.array([1], dtype="M8[s]"))
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time"]})
 ar = z.create_dataset("data", data=arr)
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time", "x", "y"]})
 
 m = fs.get_mapper("time2.zarr")
-z = zarr.open(m, mode="w")
+z = zarr.open(m, mode="w", zarr_version=2)
 ar = z.create_dataset("time", data=np.array([2], dtype="M8[s]"))
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time"]})
 ar = z.create_dataset("data", data=arr)
@@ -272,7 +272,7 @@ def test_get_coos(refs, selector, expected):
     mzz.first_pass()
     assert mzz.coos["time"].tolist() == expected
     mzz.store_coords()
-    g = zarr.open(mzz.out)
+    g = zarr.open(mzz.out, zarr_version=2)
     assert g["time"][:].tolist() == expected
     assert dict(g.attrs)
 
diff --git a/kerchunk/tests/test_combine_concat.py b/kerchunk/tests/test_combine_concat.py
index 3f7ff823..f51f10e8 100644
--- a/kerchunk/tests/test_combine_concat.py
+++ b/kerchunk/tests/test_combine_concat.py
@@ -51,7 +51,7 @@ def test_success(tmpdir, arrays, chunks, axis, m):
     refs = []
     for i, x in enumerate(arrays):
         fn = f"{tmpdir}/out{i}.zarr"
-        g = zarr.open(fn)
+        g = zarr.open(fn, zarr_version=2)
         g.create_dataset("x", data=x, chunks=chunks)
         fns.append(fn)
         ref = kerchunk.zarr.single_zarr(fn, inline=0)
@@ -62,7 +62,7 @@ def test_success(tmpdir, arrays, chunks, axis, m):
     )
 
     mapper = fsspec.get_mapper("reference://", fo=out)
-    g = zarr.open(mapper)
+    g = zarr.open(mapper, zarr_version=2)
     assert (g.x[:] == np.concatenate(arrays, axis=axis)).all()
 
     try:
@@ -76,7 +76,7 @@ def test_success(tmpdir, arrays, chunks, axis, m):
         remote_protocol="file",
         skip_instance_cache=True,
     )
-    g = zarr.open(mapper)
+    g = zarr.open(mapper, zarr_version=2)
     assert (g.x[:] == np.concatenate(arrays, axis=axis)).all()
 
     kerchunk.df.refs_to_dataframe(out, "memory://out.parq", record_size=1)
@@ -86,7 +86,7 @@ def test_success(tmpdir, arrays, chunks, axis, m):
         remote_protocol="file",
         skip_instance_cache=True,
     )
-    g = zarr.open(mapper)
+    g = zarr.open(mapper, zarr_version=2)
     assert (g.x[:] == np.concatenate(arrays, axis=axis)).all()
 
 
@@ -95,9 +95,9 @@ def test_fail_chunks(tmpdir):
     fn2 = f"{tmpdir}/out2.zarr"
     x1 = np.arange(10)
     x2 = np.arange(10, 20)
-    g = zarr.open(fn1)
+    g = zarr.open(fn1, zarr_version=2)
     g.create_dataset("x", data=x1, chunks=(2,))
-    g = zarr.open(fn2)
+    g = zarr.open(fn2, zarr_version=2)
     g.create_dataset("x", data=x2, chunks=(3,))
 
     ref1 = kerchunk.zarr.single_zarr(fn1, inline=0)
@@ -112,9 +112,9 @@ def test_fail_shape(tmpdir):
     fn2 = f"{tmpdir}/out2.zarr"
     x1 = np.arange(12).reshape(6, 2)
     x2 = np.arange(12, 24)
-    g = zarr.open(fn1)
+    g = zarr.open(fn1, zarr_version=2)
     g.create_dataset("x", data=x1, chunks=(2,))
-    g = zarr.open(fn2)
+    g = zarr.open(fn2, zarr_version=2)
     g.create_dataset("x", data=x2, chunks=(2,))
 
     ref1 = kerchunk.zarr.single_zarr(fn1, inline=0)
@@ -129,9 +129,9 @@ def test_fail_irregular_chunk_boundaries(tmpdir):
     fn2 = f"{tmpdir}/out2.zarr"
     x1 = np.arange(10)
     x2 = np.arange(10, 24)
-    g = zarr.open(fn1)
+    g = zarr.open(fn1, zarr_version=2)
     g.create_dataset("x", data=x1, chunks=(4,))
-    g = zarr.open(fn2)
+    g = zarr.open(fn2, zarr_version=2)
     g.create_dataset("x", data=x2, chunks=(4,))
 
     ref1 = kerchunk.zarr.single_zarr(fn1, inline=0)
diff --git a/kerchunk/tests/test_fits.py b/kerchunk/tests/test_fits.py
index 14ea6fc0..e7211479 100644
--- a/kerchunk/tests/test_fits.py
+++ b/kerchunk/tests/test_fits.py
@@ -18,7 +18,7 @@ def test_ascii_table():
     url = "https://fits.gsfc.nasa.gov/samples/WFPC2u5780205r_c0fx.fits"
     out = kerchunk.fits.process_file(url, extension=1)
     m = fsspec.get_mapper("reference://", fo=out, remote_protocol="https")
-    g = zarr.open(m)
+    g = zarr.open(m, zarr_version=2)
     arr = g["u5780205r_cvt.c0h.tab"][:]
     with fsspec.open(
         "https://fits.gsfc.nasa.gov/samples/WFPC2u5780205r_c0fx.fits"
@@ -31,7 +31,7 @@ def test_ascii_table():
 def test_binary_table():
     out = kerchunk.fits.process_file(btable, extension=1)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m)
+    z = zarr.open(m, zarr_version=2)
     arr = z["1"]
     with open(btable, "rb") as f:
         hdul = fits.open(f)
@@ -48,7 +48,7 @@ def test_binary_table():
 def test_cube():
     out = kerchunk.fits.process_file(range_im)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m)
+    z = zarr.open(m, zarr_version=2)
     arr = z["PRIMARY"]
     with open(range_im, "rb") as f:
         hdul = fits.open(f)
@@ -61,7 +61,7 @@ def test_with_class():
     out = ftz.translate()
     assert "fits" in repr(ftz)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m)
+    z = zarr.open(m, zarr_version=2)
     arr = z["PRIMARY"]
     with open(range_im, "rb") as f:
         hdul = fits.open(f)
@@ -76,7 +76,7 @@ def test_var():
     ftz = kerchunk.fits.FitsToZarr(var)
     out = ftz.translate()
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m)
+    z = zarr.open(m, zarr_version=2)
     arr = z["1"]
     vars = [_.tolist() for _ in arr["var"]]
 
diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py
index 32092ced..91ae9ac7 100644
--- a/kerchunk/tests/test_grib.py
+++ b/kerchunk/tests/test_grib.py
@@ -119,7 +119,7 @@ def test_grib_tree():
     corrected_msg_groups = [correct_hrrr_subhf_step(msg) for msg in scanned_msg_groups]
     result = grib_tree(corrected_msg_groups)
     fs = fsspec.filesystem("reference", fo=result)
-    zg = zarr.open_group(fs.get_mapper(""))
+    zg = zarr.open_group(fs.get_mapper(""), zarr_version=2)
     assert isinstance(zg["refc/instant/atmosphere/refc"], zarr.Array)
     assert isinstance(zg["vbdsf/avg/surface/vbdsf"], zarr.Array)
     assert set(zg["vbdsf/avg/surface"].attrs["coordinates"].split()) == set(
@@ -147,14 +147,14 @@ def test_correct_hrrr_subhf_group_step():
         scanned_msgs = ujson.load(fobj)
 
     original_zg = [
-        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""))
+        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""), zarr_version=2)
         for val in scanned_msgs
     ]
 
     corrected_msgs = [correct_hrrr_subhf_step(msg) for msg in scanned_msgs]
 
     corrected_zg = [
-        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""))
+        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""), zarr_version=2)
         for val in corrected_msgs
     ]
 
@@ -177,7 +177,7 @@ def test_hrrr_subhf_corrected_grib_tree():
 
     corrected_msgs = [correct_hrrr_subhf_step(msg) for msg in scanned_msgs]
     merged = grib_tree(corrected_msgs)
-    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""))
+    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""), zarr_version=2)
     # Check the values and shape of the time coordinates
     assert zg.u.instant.heightAboveGround.step[:].tolist() == [
         0.0,
@@ -220,7 +220,7 @@ def test_hrrr_sfcf_grib_tree():
     with open(fpath, "rb") as fobj:
         scanned_msgs = ujson.load(fobj)
     merged = grib_tree(scanned_msgs)
-    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""))
+    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""), zarr_version=2)
     # Check the heightAboveGround level shape of the time coordinates
     assert zg.u.instant.heightAboveGround.heightAboveGround[()] == 80.0
     assert zg.u.instant.heightAboveGround.heightAboveGround.shape == ()
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 69fd22b5..2f825e6d 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -193,7 +193,7 @@ def test_string_embed():
     out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
     assert txt in fs.references["vlen_str/0"]
-    z = zarr.open(fs.get_mapper())
+    z = zarr.open(fs.get_mapper(), zarr_version=2)
     assert z.vlen_str.dtype == "O"
     assert z.vlen_str[0] == txt
     assert (z.vlen_str[1:] == "").all()
@@ -204,7 +204,7 @@ def test_string_null():
     h = kerchunk.hdf.SingleHdf5ToZarr(fn, fn, vlen_encode="null", inline_threshold=0)
     out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper())
+    z = zarr.open(fs.get_mapper(), zarr_version=2)
     assert z.vlen_str.dtype == "O"
     assert (z.vlen_str[:] == None).all()
 
@@ -217,7 +217,7 @@ def test_string_leave():
         )
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper())
+    z = zarr.open(fs.get_mapper(), zarr_version=2)
     assert z.vlen_str.dtype == "S16"
     assert z.vlen_str[0]  # some obscured ID
     assert (z.vlen_str[1:] == b"").all()
@@ -232,7 +232,7 @@ def test_string_decode():
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
     assert txt in fs.cat("vlen_str/.zarray").decode()  # stored in filter def
-    z = zarr.open(fs.get_mapper())
+    z = zarr.open(fs.get_mapper(), zarr_version=2)
     assert z.vlen_str[0] == txt
     assert (z.vlen_str[1:] == "").all()
 
@@ -243,7 +243,7 @@ def test_compound_string_null():
         h = kerchunk.hdf.SingleHdf5ToZarr(f, fn, vlen_encode="null", inline_threshold=0)
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper())
+    z = zarr.open(fs.get_mapper(), zarr_version=2)
     assert z.vlen_str[0].tolist() == (10, None)
     assert (z.vlen_str["ints"][1:] == 0).all()
     assert (z.vlen_str["strs"][1:] == None).all()
@@ -257,7 +257,7 @@ def test_compound_string_leave():
         )
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper())
+    z = zarr.open(fs.get_mapper(), zarr_version=2)
     assert z.vlen_str["ints"][0] == 10
     assert z.vlen_str["strs"][0]  # random ID
     assert (z.vlen_str["ints"][1:] == 0).all()
@@ -272,7 +272,7 @@ def test_compound_string_encode():
         )
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper())
+    z = zarr.open(fs.get_mapper(), zarr_version=2)
     assert z.vlen_str["ints"][0] == 10
     assert z.vlen_str["strs"][0] == "water"
     assert (z.vlen_str["ints"][1:] == 0).all()
@@ -303,7 +303,7 @@ def test_compress():
             continue
         out = h.translate()
         m = fsspec.get_mapper("reference://", fo=out)
-        g = zarr.open(m)
+        g = zarr.open(m, zarr_version=2)
         assert np.mean(g.data) == 49.5
 
 
@@ -313,7 +313,7 @@ def test_embed():
     out = h.translate()
 
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper())
+    z = zarr.open(fs.get_mapper(), zarr_version=2)
     data = z["Domain_10"]["STER"]["min_1"]["boom_1"]["temperature"][:]
     assert data[0].tolist() == [
         "2014-04-01 00:00:00.0",
@@ -348,7 +348,7 @@ def test_translate_links():
         preserve_linked_dsets=True
     )
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper())
+    z = zarr.open(fs.get_mapper(), zarr_version=2)
 
     # 1. Test the hard linked datasets were translated correctly
     # 2. Test the soft linked datasets were translated correctly
diff --git a/kerchunk/tests/test_tiff.py b/kerchunk/tests/test_tiff.py
index 3cc52471..4011a67a 100644
--- a/kerchunk/tests/test_tiff.py
+++ b/kerchunk/tests/test_tiff.py
@@ -16,7 +16,7 @@ def test_one():
     fn = files[0]
     out = kerchunk.tiff.tiff_to_zarr(fn)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m)
+    z = zarr.open(m, zarr_version=2)
     assert list(z) == ["0", "1", "2"]
     assert z.attrs["multiscales"] == [
         {
@@ -34,7 +34,7 @@ def test_coord():
     fn = files[0]
     out = kerchunk.tiff.tiff_to_zarr(fn)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m)  # highest res is the one xarray picks
+    z = zarr.open(m, zarr_version=2)  # highest res is the one xarray picks
     out = kerchunk.tiff.generate_coords(z.attrs, z[0].shape)
 
     ds = xr.open_dataset(fn)
diff --git a/kerchunk/tests/test_utils.py b/kerchunk/tests/test_utils.py
index a1bb094d..8e4502c1 100644
--- a/kerchunk/tests/test_utils.py
+++ b/kerchunk/tests/test_utils.py
@@ -79,13 +79,13 @@ def test_inline_array():
     assert "data/1" not in out2
     assert json.loads(out2["data/.zattrs"]) == json.loads(refs["data/.zattrs"])
     fs = fsspec.filesystem("reference", fo=out2)
-    g = zarr.open(fs.get_mapper())
+    g = zarr.open(fs.get_mapper(), zarr_version=2)
     assert g.data[:].tolist() == [1, 2]
 
     out3 = kerchunk.utils.inline_array(refs, threshold=1000)  # inlines because of size
     assert "data/1" not in out3
     fs = fsspec.filesystem("reference", fo=out3)
-    g = zarr.open(fs.get_mapper())
+    g = zarr.open(fs.get_mapper(), zarr_version=2)
     assert g.data[:].tolist() == [1, 2]
 
 
@@ -99,7 +99,7 @@ def test_json():
 @pytest.mark.parametrize("chunks", [[10, 10], [5, 10]])
 def test_subchunk_exact(m, chunks):
     store = m.get_mapper("test.zarr")
-    g = zarr.open_group(store, mode="w")
+    g = zarr.open_group(store, mode="w", zarr_version=2)
     data = np.arange(100).reshape(10, 10)
     arr = g.create_dataset("data", data=data, chunks=chunks, compression=None)
     ref = kerchunk.zarr.single_zarr("memory://test.zarr")["refs"]
@@ -114,7 +114,7 @@ def test_subchunk_exact(m, chunks):
     ]
 
     g2 = zarr.open_group(
-        "reference://", storage_options={"fo": out, "remote_protocol": "memory"}
+        "reference://", storage_options={"fo": out, "remote_protocol": "memory"}, zarr_version=2
     )
     assert (g2.data[:] == data).all()
 
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 838c3cb1..4049ee63 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -226,7 +226,7 @@ def inline_array(store, threshold=1000, names=None, remote_options=None):
     fs = fsspec.filesystem(
         "reference", fo=store, **(remote_options or {}), skip_instance_cache=True
     )
-    g = zarr.open_group(fs.get_mapper(), mode="r+")
+    g = zarr.open_group(fs.get_mapper(), mode="r+", zarr_version=2)
     _inline_array(g, threshold, names=names or [])
     return fs.references
 
diff --git a/pyproject.toml b/pyproject.toml
index 415c3cbd..680f4c2f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
   "numcodecs",
   "numpy",
   "ujson",
-  "zarr<3",
+  "zarr==3.0.0a6",
 ]
 
 [project.optional-dependencies]

From d3c7e372cfa6f6822361441df79e872c9b68ee4c Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Sat, 5 Oct 2024 09:49:38 -0400
Subject: [PATCH 02/40] Bump zarr python version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 680f4c2f..6e57e223 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
   "numcodecs",
   "numpy",
   "ujson",
-  "zarr==3.0.0a6",
+  "zarr==3.0.0a7",
 ]
 
 [project.optional-dependencies]

From 25d7d14e5fb6e563012d1547013d92f28834bcec Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Sat, 5 Oct 2024 09:58:35 -0400
Subject: [PATCH 03/40] Get some tests working others failing

---
 kerchunk/hdf.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 549923d4..777201b5 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -21,11 +21,11 @@
         "for more details."
     )
 
-try:
-    from zarr.meta import encode_fill_value
-except ModuleNotFoundError:
-    # https://github.com/zarr-developers/zarr-python/issues/2021
-    from zarr.v2.meta import encode_fill_value
+# try:
+#     from zarr.meta import encode_fill_value
+# except ModuleNotFoundError:
+#     # https://github.com/zarr-developers/zarr-python/issues/2021
+#     from zarr.v2.meta import encode_fill_value
 
 lggr = logging.getLogger("h5-to-zarr")
 _HIDDEN_ATTRS = {  # from h5netcdf.attrs
@@ -465,9 +465,10 @@ def _translator(
                     if h5py.h5ds.is_scale(h5obj.id) and not cinfo:
                         return
                     if h5obj.attrs.get("_FillValue") is not None:
-                        fill = encode_fill_value(
-                            h5obj.attrs.get("_FillValue"), dt or h5obj.dtype
-                        )
+                        fill = h5obj.attrs.get("_FillValue")
+                        # fill = encode_fill_value(
+                        #     h5obj.attrs.get("_FillValue"), dt or h5obj.dtype
+                        # )
 
                 # Create a Zarr array equivalent to this HDF5 dataset...
                 za = self._zroot.require_dataset(

From ffe5f9d906381be23b41496e167d1d44835a5486 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 8 Oct 2024 17:07:53 -0400
Subject: [PATCH 04/40] get through single hdf to zarr

---
 kerchunk/combine.py                   |  8 +--
 kerchunk/fits.py                      |  2 +-
 kerchunk/grib2.py                     |  4 +-
 kerchunk/hdf.py                       | 94 ++++++++++++++++++++++-----
 kerchunk/hdf4.py                      |  2 +-
 kerchunk/netCDF3.py                   |  2 +-
 kerchunk/tests/test_combine.py        |  6 +-
 kerchunk/tests/test_combine_concat.py | 20 +++---
 kerchunk/tests/test_fits.py           | 10 +--
 kerchunk/tests/test_grib.py           | 10 +--
 kerchunk/tests/test_hdf.py            | 23 ++++---
 kerchunk/tests/test_tiff.py           |  4 +-
 kerchunk/tests/test_utils.py          |  8 +--
 kerchunk/utils.py                     |  2 +-
 14 files changed, 129 insertions(+), 66 deletions(-)

diff --git a/kerchunk/combine.py b/kerchunk/combine.py
index 155ba4c9..b02fa395 100644
--- a/kerchunk/combine.py
+++ b/kerchunk/combine.py
@@ -203,7 +203,7 @@ def append(
         ds = xr.open_dataset(
             fs.get_mapper(), engine="zarr", backend_kwargs={"consolidated": False}
         )
-        z = zarr.open(fs.get_mapper(), zarr_version=2)
+        z = zarr.open(fs.get_mapper(), zarr_format=2)
         mzz = MultiZarrToZarr(
             path,
             out=fs.references,  # dict or parquet/lazy
@@ -360,7 +360,7 @@ def first_pass(self):
                 fs._dircache_from_items()
 
             logger.debug("First pass: %s", i)
-            z = zarr.open_group(fs.get_mapper(""), zarr_version=2)
+            z = zarr.open_group(fs.get_mapper(""), zarr_format=2)
             for var in self.concat_dims:
                 value = self._get_value(i, z, var, fn=self._paths[i])
                 if isinstance(value, np.ndarray):
@@ -387,7 +387,7 @@ def store_coords(self):
         """
         kv = {}
         store = zarr.storage.KVStore(kv)
-        group = zarr.open(store, zarr_version=2)
+        group = zarr.open(store, zarr_format=2)
         m = self.fss[0].get_mapper("")
         z = zarr.open(m)
         for k, v in self.coos.items():
@@ -461,7 +461,7 @@ def second_pass(self):
         for i, fs in enumerate(self.fss):
             to_download = {}
             m = fs.get_mapper("")
-            z = zarr.open(m, zarr_version=2)
+            z = zarr.open(m, zarr_format=2)
 
             if no_deps is None:
                 # done first time only
diff --git a/kerchunk/fits.py b/kerchunk/fits.py
index f714af97..f50bef64 100644
--- a/kerchunk/fits.py
+++ b/kerchunk/fits.py
@@ -72,7 +72,7 @@ def process_file(
 
     storage_options = storage_options or {}
     out = out or {}
-    g = zarr.open(out, zarr_version=2)
+    g = zarr.open(out, zarr_format=2)
 
     with fsspec.open(url, mode="rb", **storage_options) as f:
         infile = fits.open(f, do_not_scale_image_data=True)
diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py
index 06108db5..7d75786f 100644
--- a/kerchunk/grib2.py
+++ b/kerchunk/grib2.py
@@ -191,7 +191,7 @@ def scan_grib(
             if good is False:
                 continue
 
-            z = zarr.open_group(store, zarr_version=2)
+            z = zarr.open_group(store, zarr_format=2)
             global_attrs = {
                 f"GRIB_{k}": m[k]
                 for k in cfgrib.dataset.GLOBAL_ATTRIBUTES_KEYS
@@ -398,7 +398,7 @@ def grib_tree(
 
     # TODO allow passing a LazyReferenceMapper as output?
     zarr_store = {}
-    zroot = zarr.open_group(store=zarr_store, zarr_version=2)
+    zroot = zarr.open_group(store=zarr_store, zarr_format=2)
 
     aggregations: Dict[str, List] = defaultdict(list)
     aggregation_dims: Dict[str, Set] = defaultdict(set)
diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 777201b5..4073a2b3 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -1,7 +1,8 @@
 import base64
 import io
 import logging
-from typing import Union, BinaryIO
+from typing import Union, BinaryIO, Any, cast
+from packaging.version import Version
 
 import fsspec.core
 from fsspec.implementations.reference import LazyReferenceMapper
@@ -111,8 +112,13 @@ def __init__(
         if vlen_encode not in ["embed", "null", "leave", "encode"]:
             raise NotImplementedError
         self.vlen = vlen_encode
-        self.store = out or {}
-        self._zroot = zarr.group(store=self.store, overwrite=True)
+        self.store_dict = out or {}
+        if Version(zarr.__version__) < Version("3.0.0.a0"):
+            self.store = zarr.storage.KVStore(self.store_dict)
+        else:
+            self.store = zarr.storage.MemoryStore(mode="a", store_dict=self.store_dict)
+        # self.store = out or {}
+        self._zroot = zarr.group(store=self.store, zarr_format=2, overwrite=True)
 
         self._uri = url
         self.error = error
@@ -141,8 +147,12 @@ def translate(self, preserve_linked_dsets=False):
         lggr.debug("Translation begins")
         self._transfer_attrs(self._h5f, self._zroot)
 
+        print('transfer done')
+
         self._h5f.visititems(self._translator)
 
+        print('visit done')
+
         if preserve_linked_dsets:
             if not has_visititems_links():
                 raise RuntimeError(
@@ -157,7 +167,10 @@ def translate(self, preserve_linked_dsets=False):
             self.store.flush()
             return self.store
         else:
-            store = _encode_for_JSON(self.store)
+            for k, v in self.store_dict.items():
+                if isinstance(v, zarr.core.buffer.cpu.Buffer):
+                    self.store_dict[k] = v.to_bytes()
+            store = _encode_for_JSON(self.store_dict)
             return {"version": 1, "refs": store}
 
     def _unref(self, ref):
@@ -466,26 +479,30 @@ def _translator(
                         return
                     if h5obj.attrs.get("_FillValue") is not None:
                         fill = h5obj.attrs.get("_FillValue")
-                        # fill = encode_fill_value(
-                        #     h5obj.attrs.get("_FillValue"), dt or h5obj.dtype
-                        # )
+                        fill = encode_fill_value(
+                            h5obj.attrs.get("_FillValue"), dt or h5obj.dtype
+                        )
+
+                adims = self._get_array_dims(h5obj)
 
-                # Create a Zarr array equivalent to this HDF5 dataset...
-                za = self._zroot.require_dataset(
-                    h5obj.name,
+                # Create a Zarr array equivalent to this HDF5 dataset..
+                za = self._zroot.require_array(
+                    name=h5obj.name,
                     shape=h5obj.shape,
                     dtype=dt or h5obj.dtype,
                     chunks=h5obj.chunks or False,
                     fill_value=fill,
-                    compression=None,
+                    compressor=None,
                     filters=filters,
-                    overwrite=True,
+                    attributes={
+                        "_ARRAY_DIMENSIONS": adims,
+                    },
                     **kwargs,
                 )
                 lggr.debug(f"Created Zarr array: {za}")
-                self._transfer_attrs(h5obj, za)
-                adims = self._get_array_dims(h5obj)
-                za.attrs["_ARRAY_DIMENSIONS"] = adims
+                #self._transfer_attrs(h5obj, za)
+
+                # za.attrs["_ARRAY_DIMENSIONS"] = adims
                 lggr.debug(f"_ARRAY_DIMENSIONS = {adims}")
 
                 if "data" in kwargs:
@@ -497,6 +514,7 @@ def _translator(
                         if h5obj.fletcher32:
                             logging.info("Discarding fletcher32 checksum")
                             v["size"] -= 4
+                        key = ".".join(map(str, k))
                         if (
                             self.inline
                             and isinstance(v, dict)
@@ -509,9 +527,10 @@ def _translator(
                                 data.decode("ascii")
                             except UnicodeDecodeError:
                                 data = b"base64:" + base64.b64encode(data)
-                            self.store[za._chunk_key(k)] = data
+
+                            self.store_dict[key] = data
                         else:
-                            self.store[za._chunk_key(k)] = [
+                            self.store_dict[key] = [
                                 self._uri,
                                 v["offset"],
                                 v["size"],
@@ -523,6 +542,7 @@ def _translator(
                 self._transfer_attrs(h5obj, zgrp)
         except Exception as e:
             import traceback
+            raise e
 
             msg = "\n".join(
                 [
@@ -682,3 +702,43 @@ def _is_netcdf_variable(dataset: h5py.Dataset):
 
 def has_visititems_links():
     return hasattr(h5py.Group, "visititems_links")
+
+def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any:
+    # early out
+    if v is None:
+        return v
+    if dtype.kind == "V" and dtype.hasobject:
+        if object_codec is None:
+            raise ValueError("missing object_codec for object array")
+        v = object_codec.encode(v)
+        v = str(base64.standard_b64encode(v), "ascii")
+        return v
+    if dtype.kind == "f":
+        if np.isnan(v):
+            return "NaN"
+        elif np.isposinf(v):
+            return "Infinity"
+        elif np.isneginf(v):
+            return "-Infinity"
+        else:
+            return float(v)
+    elif dtype.kind in "ui":
+        return int(v)
+    elif dtype.kind == "b":
+        return bool(v)
+    elif dtype.kind in "c":
+        c = cast(np.complex128, np.dtype(complex).type())
+        v = (
+            encode_fill_value(v.real, c.real.dtype, object_codec),
+            encode_fill_value(v.imag, c.imag.dtype, object_codec),
+        )
+        return v
+    elif dtype.kind in "SV":
+        v = str(base64.standard_b64encode(v), "ascii")
+        return v
+    elif dtype.kind == "U":
+        return v
+    elif dtype.kind in "mM":
+        return int(v.view("i8"))
+    else:
+        return v
diff --git a/kerchunk/hdf4.py b/kerchunk/hdf4.py
index 4235d139..8339659b 100644
--- a/kerchunk/hdf4.py
+++ b/kerchunk/hdf4.py
@@ -144,7 +144,7 @@ def translate(self, filename=None, storage_options=None):
             remote_protocol=prot,
             remote_options=self.st,
         )
-        g = zarr.open_group("reference://", storage_options=dict(fs=fs), zarr_version=2)
+        g = zarr.open_group("reference://", storage_options=dict(fs=fs), zarr_format=2)
         refs = {}
         for k, v in output.items():
             if isinstance(v, dict):
diff --git a/kerchunk/netCDF3.py b/kerchunk/netCDF3.py
index 8e0994ca..d44fc808 100644
--- a/kerchunk/netCDF3.py
+++ b/kerchunk/netCDF3.py
@@ -167,7 +167,7 @@ def translate(self):
         import zarr
 
         out = self.out
-        z = zarr.open(out, mode="w", zarr_version=2)
+        z = zarr.open(out, mode="w", zarr_format=2)
         for dim, var in self.variables.items():
             if dim in self.chunks:
                 shape = self.chunks[dim][-1]
diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py
index 1b5713b2..868a39ff 100644
--- a/kerchunk/tests/test_combine.py
+++ b/kerchunk/tests/test_combine.py
@@ -133,14 +133,14 @@
 
 # simple time arrays - xarray can't make these!
 m = fs.get_mapper("time1.zarr")
-z = zarr.open(m, mode="w", zarr_version=2)
+z = zarr.open(m, mode="w", zarr_format=2)
 ar = z.create_dataset("time", data=np.array([1], dtype="M8[s]"))
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time"]})
 ar = z.create_dataset("data", data=arr)
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time", "x", "y"]})
 
 m = fs.get_mapper("time2.zarr")
-z = zarr.open(m, mode="w", zarr_version=2)
+z = zarr.open(m, mode="w", zarr_format=2)
 ar = z.create_dataset("time", data=np.array([2], dtype="M8[s]"))
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time"]})
 ar = z.create_dataset("data", data=arr)
@@ -272,7 +272,7 @@ def test_get_coos(refs, selector, expected):
     mzz.first_pass()
     assert mzz.coos["time"].tolist() == expected
     mzz.store_coords()
-    g = zarr.open(mzz.out, zarr_version=2)
+    g = zarr.open(mzz.out, zarr_format=2)
     assert g["time"][:].tolist() == expected
     assert dict(g.attrs)
 
diff --git a/kerchunk/tests/test_combine_concat.py b/kerchunk/tests/test_combine_concat.py
index f51f10e8..23e785df 100644
--- a/kerchunk/tests/test_combine_concat.py
+++ b/kerchunk/tests/test_combine_concat.py
@@ -51,7 +51,7 @@ def test_success(tmpdir, arrays, chunks, axis, m):
     refs = []
     for i, x in enumerate(arrays):
         fn = f"{tmpdir}/out{i}.zarr"
-        g = zarr.open(fn, zarr_version=2)
+        g = zarr.open(fn, zarr_format=2)
         g.create_dataset("x", data=x, chunks=chunks)
         fns.append(fn)
         ref = kerchunk.zarr.single_zarr(fn, inline=0)
@@ -62,7 +62,7 @@ def test_success(tmpdir, arrays, chunks, axis, m):
     )
 
     mapper = fsspec.get_mapper("reference://", fo=out)
-    g = zarr.open(mapper, zarr_version=2)
+    g = zarr.open(mapper, zarr_format=2)
     assert (g.x[:] == np.concatenate(arrays, axis=axis)).all()
 
     try:
@@ -76,7 +76,7 @@ def test_success(tmpdir, arrays, chunks, axis, m):
         remote_protocol="file",
         skip_instance_cache=True,
     )
-    g = zarr.open(mapper, zarr_version=2)
+    g = zarr.open(mapper, zarr_format=2)
     assert (g.x[:] == np.concatenate(arrays, axis=axis)).all()
 
     kerchunk.df.refs_to_dataframe(out, "memory://out.parq", record_size=1)
@@ -86,7 +86,7 @@ def test_success(tmpdir, arrays, chunks, axis, m):
         remote_protocol="file",
         skip_instance_cache=True,
     )
-    g = zarr.open(mapper, zarr_version=2)
+    g = zarr.open(mapper, zarr_format=2)
     assert (g.x[:] == np.concatenate(arrays, axis=axis)).all()
 
 
@@ -95,9 +95,9 @@ def test_fail_chunks(tmpdir):
     fn2 = f"{tmpdir}/out2.zarr"
     x1 = np.arange(10)
     x2 = np.arange(10, 20)
-    g = zarr.open(fn1, zarr_version=2)
+    g = zarr.open(fn1, zarr_format=2)
     g.create_dataset("x", data=x1, chunks=(2,))
-    g = zarr.open(fn2, zarr_version=2)
+    g = zarr.open(fn2, zarr_format=2)
     g.create_dataset("x", data=x2, chunks=(3,))
 
     ref1 = kerchunk.zarr.single_zarr(fn1, inline=0)
@@ -112,9 +112,9 @@ def test_fail_shape(tmpdir):
     fn2 = f"{tmpdir}/out2.zarr"
     x1 = np.arange(12).reshape(6, 2)
     x2 = np.arange(12, 24)
-    g = zarr.open(fn1, zarr_version=2)
+    g = zarr.open(fn1, zarr_format=2)
     g.create_dataset("x", data=x1, chunks=(2,))
-    g = zarr.open(fn2, zarr_version=2)
+    g = zarr.open(fn2, zarr_format=2)
     g.create_dataset("x", data=x2, chunks=(2,))
 
     ref1 = kerchunk.zarr.single_zarr(fn1, inline=0)
@@ -129,9 +129,9 @@ def test_fail_irregular_chunk_boundaries(tmpdir):
     fn2 = f"{tmpdir}/out2.zarr"
     x1 = np.arange(10)
     x2 = np.arange(10, 24)
-    g = zarr.open(fn1, zarr_version=2)
+    g = zarr.open(fn1, zarr_format=2)
     g.create_dataset("x", data=x1, chunks=(4,))
-    g = zarr.open(fn2, zarr_version=2)
+    g = zarr.open(fn2, zarr_format=2)
     g.create_dataset("x", data=x2, chunks=(4,))
 
     ref1 = kerchunk.zarr.single_zarr(fn1, inline=0)
diff --git a/kerchunk/tests/test_fits.py b/kerchunk/tests/test_fits.py
index e7211479..5d7c3b6d 100644
--- a/kerchunk/tests/test_fits.py
+++ b/kerchunk/tests/test_fits.py
@@ -18,7 +18,7 @@ def test_ascii_table():
     url = "https://fits.gsfc.nasa.gov/samples/WFPC2u5780205r_c0fx.fits"
     out = kerchunk.fits.process_file(url, extension=1)
     m = fsspec.get_mapper("reference://", fo=out, remote_protocol="https")
-    g = zarr.open(m, zarr_version=2)
+    g = zarr.open(m, zarr_format=2)
     arr = g["u5780205r_cvt.c0h.tab"][:]
     with fsspec.open(
         "https://fits.gsfc.nasa.gov/samples/WFPC2u5780205r_c0fx.fits"
@@ -31,7 +31,7 @@ def test_ascii_table():
 def test_binary_table():
     out = kerchunk.fits.process_file(btable, extension=1)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_version=2)
+    z = zarr.open(m, zarr_format=2)
     arr = z["1"]
     with open(btable, "rb") as f:
         hdul = fits.open(f)
@@ -48,7 +48,7 @@ def test_binary_table():
 def test_cube():
     out = kerchunk.fits.process_file(range_im)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_version=2)
+    z = zarr.open(m, zarr_format=2)
     arr = z["PRIMARY"]
     with open(range_im, "rb") as f:
         hdul = fits.open(f)
@@ -61,7 +61,7 @@ def test_with_class():
     out = ftz.translate()
     assert "fits" in repr(ftz)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_version=2)
+    z = zarr.open(m, zarr_format=2)
     arr = z["PRIMARY"]
     with open(range_im, "rb") as f:
         hdul = fits.open(f)
@@ -76,7 +76,7 @@ def test_var():
     ftz = kerchunk.fits.FitsToZarr(var)
     out = ftz.translate()
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_version=2)
+    z = zarr.open(m, zarr_format=2)
     arr = z["1"]
     vars = [_.tolist() for _ in arr["var"]]
 
diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py
index 91ae9ac7..9102529e 100644
--- a/kerchunk/tests/test_grib.py
+++ b/kerchunk/tests/test_grib.py
@@ -119,7 +119,7 @@ def test_grib_tree():
     corrected_msg_groups = [correct_hrrr_subhf_step(msg) for msg in scanned_msg_groups]
     result = grib_tree(corrected_msg_groups)
     fs = fsspec.filesystem("reference", fo=result)
-    zg = zarr.open_group(fs.get_mapper(""), zarr_version=2)
+    zg = zarr.open_group(fs.get_mapper(""), zarr_format=2)
     assert isinstance(zg["refc/instant/atmosphere/refc"], zarr.Array)
     assert isinstance(zg["vbdsf/avg/surface/vbdsf"], zarr.Array)
     assert set(zg["vbdsf/avg/surface"].attrs["coordinates"].split()) == set(
@@ -147,14 +147,14 @@ def test_correct_hrrr_subhf_group_step():
         scanned_msgs = ujson.load(fobj)
 
     original_zg = [
-        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""), zarr_version=2)
+        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""), zarr_format=2)
         for val in scanned_msgs
     ]
 
     corrected_msgs = [correct_hrrr_subhf_step(msg) for msg in scanned_msgs]
 
     corrected_zg = [
-        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""), zarr_version=2)
+        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""), zarr_format=2)
         for val in corrected_msgs
     ]
 
@@ -177,7 +177,7 @@ def test_hrrr_subhf_corrected_grib_tree():
 
     corrected_msgs = [correct_hrrr_subhf_step(msg) for msg in scanned_msgs]
     merged = grib_tree(corrected_msgs)
-    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""), zarr_version=2)
+    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""), zarr_format=2)
     # Check the values and shape of the time coordinates
     assert zg.u.instant.heightAboveGround.step[:].tolist() == [
         0.0,
@@ -220,7 +220,7 @@ def test_hrrr_sfcf_grib_tree():
     with open(fpath, "rb") as fobj:
         scanned_msgs = ujson.load(fobj)
     merged = grib_tree(scanned_msgs)
-    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""), zarr_version=2)
+    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""), zarr_format=2)
     # Check the heightAboveGround level shape of the time coordinates
     assert zg.u.instant.heightAboveGround.heightAboveGround[()] == 80.0
     assert zg.u.instant.heightAboveGround.heightAboveGround.shape == ()
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 2f825e6d..e140ca48 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -18,6 +18,7 @@ def test_single():
     """Test creating references for a single HDF file"""
     url = "s3://noaa-nwm-retro-v2.0-pds/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp"
     so = dict(anon=True, default_fill_cache=False, default_cache_type="none")
+
     with fsspec.open(url, **so) as f:
         h5chunks = SingleHdf5ToZarr(f, url, storage_options=so)
         test_dict = h5chunks.translate()
@@ -25,6 +26,8 @@ def test_single():
     m = fsspec.get_mapper(
         "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
     )
+    x = [(k, v) for (k, v) in m.items()]
+    raise ValueError("foo")
     ds = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False))
 
     with fsspec.open(url, **so) as f:
@@ -193,7 +196,7 @@ def test_string_embed():
     out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
     assert txt in fs.references["vlen_str/0"]
-    z = zarr.open(fs.get_mapper(), zarr_version=2)
+    z = zarr.open(fs.get_mapper(), zarr_format=2)
     assert z.vlen_str.dtype == "O"
     assert z.vlen_str[0] == txt
     assert (z.vlen_str[1:] == "").all()
@@ -204,7 +207,7 @@ def test_string_null():
     h = kerchunk.hdf.SingleHdf5ToZarr(fn, fn, vlen_encode="null", inline_threshold=0)
     out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_version=2)
+    z = zarr.open(fs.get_mapper(), zarr_format=2)
     assert z.vlen_str.dtype == "O"
     assert (z.vlen_str[:] == None).all()
 
@@ -217,7 +220,7 @@ def test_string_leave():
         )
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_version=2)
+    z = zarr.open(fs.get_mapper(), zarr_format=2)
     assert z.vlen_str.dtype == "S16"
     assert z.vlen_str[0]  # some obscured ID
     assert (z.vlen_str[1:] == b"").all()
@@ -232,7 +235,7 @@ def test_string_decode():
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
     assert txt in fs.cat("vlen_str/.zarray").decode()  # stored in filter def
-    z = zarr.open(fs.get_mapper(), zarr_version=2)
+    z = zarr.open(fs.get_mapper(), zarr_format=2)
     assert z.vlen_str[0] == txt
     assert (z.vlen_str[1:] == "").all()
 
@@ -243,7 +246,7 @@ def test_compound_string_null():
         h = kerchunk.hdf.SingleHdf5ToZarr(f, fn, vlen_encode="null", inline_threshold=0)
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_version=2)
+    z = zarr.open(fs.get_mapper(), zarr_format=2)
     assert z.vlen_str[0].tolist() == (10, None)
     assert (z.vlen_str["ints"][1:] == 0).all()
     assert (z.vlen_str["strs"][1:] == None).all()
@@ -257,7 +260,7 @@ def test_compound_string_leave():
         )
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_version=2)
+    z = zarr.open(fs.get_mapper(), zarr_format=2)
     assert z.vlen_str["ints"][0] == 10
     assert z.vlen_str["strs"][0]  # random ID
     assert (z.vlen_str["ints"][1:] == 0).all()
@@ -272,7 +275,7 @@ def test_compound_string_encode():
         )
         out = h.translate()
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_version=2)
+    z = zarr.open(fs.get_mapper(), zarr_format=2)
     assert z.vlen_str["ints"][0] == 10
     assert z.vlen_str["strs"][0] == "water"
     assert (z.vlen_str["ints"][1:] == 0).all()
@@ -303,7 +306,7 @@ def test_compress():
             continue
         out = h.translate()
         m = fsspec.get_mapper("reference://", fo=out)
-        g = zarr.open(m, zarr_version=2)
+        g = zarr.open(m, zarr_format=2)
         assert np.mean(g.data) == 49.5
 
 
@@ -313,7 +316,7 @@ def test_embed():
     out = h.translate()
 
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_version=2)
+    z = zarr.open(fs.get_mapper(), zarr_format=2)
     data = z["Domain_10"]["STER"]["min_1"]["boom_1"]["temperature"][:]
     assert data[0].tolist() == [
         "2014-04-01 00:00:00.0",
@@ -348,7 +351,7 @@ def test_translate_links():
         preserve_linked_dsets=True
     )
     fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_version=2)
+    z = zarr.open(fs.get_mapper(), zarr_format=2)
 
     # 1. Test the hard linked datasets were translated correctly
     # 2. Test the soft linked datasets were translated correctly
diff --git a/kerchunk/tests/test_tiff.py b/kerchunk/tests/test_tiff.py
index 4011a67a..74ba59a4 100644
--- a/kerchunk/tests/test_tiff.py
+++ b/kerchunk/tests/test_tiff.py
@@ -16,7 +16,7 @@ def test_one():
     fn = files[0]
     out = kerchunk.tiff.tiff_to_zarr(fn)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_version=2)
+    z = zarr.open(m, zarr_format=2)
     assert list(z) == ["0", "1", "2"]
     assert z.attrs["multiscales"] == [
         {
@@ -34,7 +34,7 @@ def test_coord():
     fn = files[0]
     out = kerchunk.tiff.tiff_to_zarr(fn)
     m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_version=2)  # highest res is the one xarray picks
+    z = zarr.open(m, zarr_format=2)  # highest res is the one xarray picks
     out = kerchunk.tiff.generate_coords(z.attrs, z[0].shape)
 
     ds = xr.open_dataset(fn)
diff --git a/kerchunk/tests/test_utils.py b/kerchunk/tests/test_utils.py
index 8e4502c1..a951c36c 100644
--- a/kerchunk/tests/test_utils.py
+++ b/kerchunk/tests/test_utils.py
@@ -79,13 +79,13 @@ def test_inline_array():
     assert "data/1" not in out2
     assert json.loads(out2["data/.zattrs"]) == json.loads(refs["data/.zattrs"])
     fs = fsspec.filesystem("reference", fo=out2)
-    g = zarr.open(fs.get_mapper(), zarr_version=2)
+    g = zarr.open(fs.get_mapper(), zarr_format=2)
     assert g.data[:].tolist() == [1, 2]
 
     out3 = kerchunk.utils.inline_array(refs, threshold=1000)  # inlines because of size
     assert "data/1" not in out3
     fs = fsspec.filesystem("reference", fo=out3)
-    g = zarr.open(fs.get_mapper(), zarr_version=2)
+    g = zarr.open(fs.get_mapper(), zarr_format=2)
     assert g.data[:].tolist() == [1, 2]
 
 
@@ -99,7 +99,7 @@ def test_json():
 @pytest.mark.parametrize("chunks", [[10, 10], [5, 10]])
 def test_subchunk_exact(m, chunks):
     store = m.get_mapper("test.zarr")
-    g = zarr.open_group(store, mode="w", zarr_version=2)
+    g = zarr.open_group(store, mode="w", zarr_format=2)
     data = np.arange(100).reshape(10, 10)
     arr = g.create_dataset("data", data=data, chunks=chunks, compression=None)
     ref = kerchunk.zarr.single_zarr("memory://test.zarr")["refs"]
@@ -114,7 +114,7 @@ def test_subchunk_exact(m, chunks):
     ]
 
     g2 = zarr.open_group(
-        "reference://", storage_options={"fo": out, "remote_protocol": "memory"}, zarr_version=2
+        "reference://", storage_options={"fo": out, "remote_protocol": "memory"}, zarr_format=2
     )
     assert (g2.data[:] == data).all()
 
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 4049ee63..b52a9c0b 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -226,7 +226,7 @@ def inline_array(store, threshold=1000, names=None, remote_options=None):
     fs = fsspec.filesystem(
         "reference", fo=store, **(remote_options or {}), skip_instance_cache=True
     )
-    g = zarr.open_group(fs.get_mapper(), mode="r+", zarr_version=2)
+    g = zarr.open_group(fs.get_mapper(), mode="r+", zarr_format=2)
     _inline_array(g, threshold, names=names or [])
     return fs.references
 

From 5aef233686c89dc9ca56325f1c654e35a80e8440 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 8 Oct 2024 17:13:36 -0400
Subject: [PATCH 05/40] Save progress

---
 kerchunk/tests/test_hdf.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index e140ca48..4135495b 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -6,6 +6,7 @@
 import pytest
 import xarray as xr
 import zarr
+from zarr.storage import MemoryStore
 import h5py
 
 from kerchunk.hdf import SingleHdf5ToZarr, has_visititems_links
@@ -26,9 +27,8 @@ def test_single():
     m = fsspec.get_mapper(
         "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
     )
-    x = [(k, v) for (k, v) in m.items()]
-    raise ValueError("foo")
-    ds = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False))
+    store = MemoryStore(m)
+    ds = xr.open_dataset(store, engine="zarr", backend_kwargs=dict(consolidated=False))
 
     with fsspec.open(url, **so) as f:
         expected = xr.open_dataset(f, engine="h5netcdf")

From b9323d2e227bd7b163492afe2e7a1f5eec6bda91 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 8 Oct 2024 20:37:52 -0400
Subject: [PATCH 06/40] Cleanup, almost working with hdf

---
 kerchunk/hdf.py            | 12 +++-------
 kerchunk/tests/test_hdf.py | 45 +++++++++++++++++++++++++++-----------
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 4073a2b3..501de4f3 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -115,11 +115,11 @@ def __init__(
         self.store_dict = out or {}
         if Version(zarr.__version__) < Version("3.0.0.a0"):
             self.store = zarr.storage.KVStore(self.store_dict)
+            self._zroot = zarr.group(store=self.store, overwrite=True)
         else:
             self.store = zarr.storage.MemoryStore(mode="a", store_dict=self.store_dict)
-        # self.store = out or {}
-        self._zroot = zarr.group(store=self.store, zarr_format=2, overwrite=True)
-
+            self._zroot = zarr.group(store=self.store, zarr_format=2, overwrite=True)
+    
         self._uri = url
         self.error = error
         lggr.debug(f"HDF5 file URI: {self._uri}")
@@ -146,13 +146,8 @@ def translate(self, preserve_linked_dsets=False):
         """
         lggr.debug("Translation begins")
         self._transfer_attrs(self._h5f, self._zroot)
-
-        print('transfer done')
-
         self._h5f.visititems(self._translator)
 
-        print('visit done')
-
         if preserve_linked_dsets:
             if not has_visititems_links():
                 raise RuntimeError(
@@ -542,7 +537,6 @@ def _translator(
                 self._transfer_attrs(h5obj, zgrp)
         except Exception as e:
             import traceback
-            raise e
 
             msg = "\n".join(
                 [
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 4135495b..e2806545 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -1,6 +1,9 @@
 import fsspec
 import os.path as osp
 
+import fsspec.implementations
+import fsspec.implementations.reference
+
 import kerchunk.hdf
 import numpy as np
 import pytest
@@ -9,6 +12,8 @@
 from zarr.storage import MemoryStore
 import h5py
 
+from packaging.version import Version
+
 from kerchunk.hdf import SingleHdf5ToZarr, has_visititems_links
 from kerchunk.combine import MultiZarrToZarr, drop
 
@@ -24,11 +29,15 @@ def test_single():
         h5chunks = SingleHdf5ToZarr(f, url, storage_options=so)
         test_dict = h5chunks.translate()
 
-    m = fsspec.get_mapper(
-        "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
-    )
-    store = MemoryStore(m)
-    ds = xr.open_dataset(store, engine="zarr", backend_kwargs=dict(consolidated=False))
+    if Version(zarr.__version__) < Version("3.0.0.a0"):
+        store = fsspec.get_mapper(
+            "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
+        )
+    else:
+        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict)
+        store = zarr.storage.RemoteStore(fs, mode="r")
+
+    ds = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
 
     with fsspec.open(url, **so) as f:
         expected = xr.open_dataset(f, engine="h5netcdf")
@@ -45,22 +54,32 @@ def test_single_direct_open():
         h5f=url, inline_threshold=300, storage_options=so
     ).translate()
 
-    m = fsspec.get_mapper(
-        "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
-    )
+    if Version(zarr.__version__) < Version("3.0.0.a0"):
+        store = fsspec.get_mapper(
+            "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
+        )
+    else:
+        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict)
+        store = zarr.storage.RemoteStore(fs, mode="r")
+
     ds_direct = xr.open_dataset(
-        m, engine="zarr", backend_kwargs=dict(consolidated=False)
+        store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
     )
 
     with fsspec.open(url, **so) as f:
         h5chunks = SingleHdf5ToZarr(f, url, storage_options=so)
         test_dict = h5chunks.translate()
 
-    m = fsspec.get_mapper(
-        "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
-    )
+    if Version(zarr.__version__) < Version("3.0.0.a0"):
+        store = fsspec.get_mapper(
+            "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
+        )
+    else:
+        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict)
+        store = zarr.storage.RemoteStore(fs, mode="r")
+
     ds_from_file_opener = xr.open_dataset(
-        m, engine="zarr", backend_kwargs=dict(consolidated=False)
+        store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
     )
 
     xr.testing.assert_equal(

From 0f1711944159edcbcce563cf5b7c8bde1e5e5348 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 8 Oct 2024 21:46:49 -0400
Subject: [PATCH 07/40] Closer...

---
 kerchunk/hdf.py            | 14 +++++++++++---
 kerchunk/tests/test_hdf.py |  7 +++++--
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 501de4f3..5e4d2304 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -162,9 +162,16 @@ def translate(self, preserve_linked_dsets=False):
             self.store.flush()
             return self.store
         else:
+            keys_to_remove = []
+            new_keys = {}
             for k, v in self.store_dict.items():
                 if isinstance(v, zarr.core.buffer.cpu.Buffer):
-                    self.store_dict[k] = v.to_bytes()
+                    key = str.removeprefix(k, "/")
+                    new_keys[key] = v.to_bytes()
+                    keys_to_remove.append(k)
+            for k in keys_to_remove:
+                del self.store_dict[k]
+            self.store_dict.update(new_keys)
             store = _encode_for_JSON(self.store_dict)
             return {"version": 1, "refs": store}
 
@@ -495,7 +502,7 @@ def _translator(
                     **kwargs,
                 )
                 lggr.debug(f"Created Zarr array: {za}")
-                #self._transfer_attrs(h5obj, za)
+                self._transfer_attrs(h5obj, za)
 
                 # za.attrs["_ARRAY_DIMENSIONS"] = adims
                 lggr.debug(f"_ARRAY_DIMENSIONS = {adims}")
@@ -509,7 +516,8 @@ def _translator(
                         if h5obj.fletcher32:
                             logging.info("Discarding fletcher32 checksum")
                             v["size"] -= 4
-                        key = ".".join(map(str, k))
+                        key =  str.removeprefix(h5obj.name, "/") + "/" + ".".join(map(str, k))
+
                         if (
                             self.inline
                             and isinstance(v, dict)
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index e2806545..2fe4e1cf 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -1,3 +1,4 @@
+import asyncio
 import fsspec
 import os.path as osp
 
@@ -9,8 +10,6 @@
 import pytest
 import xarray as xr
 import zarr
-from zarr.storage import MemoryStore
-import h5py
 
 from packaging.version import Version
 
@@ -20,6 +19,10 @@
 here = osp.dirname(__file__)
 
 
+async def list_dir(store, path):
+    [x async for x in store.list_dir(path)]
+
+
 def test_single():
     """Test creating references for a single HDF file"""
     url = "s3://noaa-nwm-retro-v2.0-pds/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp"

From 5c8806bf272334b59cfdba13a9d746cef9e51329 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Wed, 9 Oct 2024 14:18:17 -0400
Subject: [PATCH 08/40] Updating tests

---
 kerchunk/hdf.py            |  1 +
 kerchunk/tests/test_hdf.py | 63 ++++++++++++++------------------------
 2 files changed, 24 insertions(+), 40 deletions(-)

diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 5e4d2304..6bb16922 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -705,6 +705,7 @@ def _is_netcdf_variable(dataset: h5py.Dataset):
 def has_visititems_links():
     return hasattr(h5py.Group, "visititems_links")
 
+
 def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any:
     # early out
     if v is None:
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 2fe4e1cf..ace45472 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -23,6 +23,16 @@ async def list_dir(store, path):
     [x async for x in store.list_dir(path)]
 
 
+def create_store(test_dict: dict):
+    if Version(zarr.__version__) < Version("3.0.0.a0"):
+        return fsspec.get_mapper(
+            "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
+        )
+    else:
+        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict)
+        return zarr.storage.RemoteStore(fs, mode="r")
+
+
 def test_single():
     """Test creating references for a single HDF file"""
     url = "s3://noaa-nwm-retro-v2.0-pds/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp"
@@ -32,13 +42,7 @@ def test_single():
         h5chunks = SingleHdf5ToZarr(f, url, storage_options=so)
         test_dict = h5chunks.translate()
 
-    if Version(zarr.__version__) < Version("3.0.0.a0"):
-        store = fsspec.get_mapper(
-            "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
-        )
-    else:
-        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict)
-        store = zarr.storage.RemoteStore(fs, mode="r")
+    store = create_store(test_dict)
 
     ds = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
 
@@ -57,13 +61,7 @@ def test_single_direct_open():
         h5f=url, inline_threshold=300, storage_options=so
     ).translate()
 
-    if Version(zarr.__version__) < Version("3.0.0.a0"):
-        store = fsspec.get_mapper(
-            "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
-        )
-    else:
-        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict)
-        store = zarr.storage.RemoteStore(fs, mode="r")
+    store = create_store(test_dict)
 
     ds_direct = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
@@ -73,13 +71,7 @@ def test_single_direct_open():
         h5chunks = SingleHdf5ToZarr(f, url, storage_options=so)
         test_dict = h5chunks.translate()
 
-    if Version(zarr.__version__) < Version("3.0.0.a0"):
-        store = fsspec.get_mapper(
-            "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
-        )
-    else:
-        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict)
-        store = zarr.storage.RemoteStore(fs, mode="r")
+    store = create_store(test_dict)
 
     ds_from_file_opener = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
@@ -105,11 +97,8 @@ def test_multizarr(generate_mzz):
     """Test creating a combined reference file with MultiZarrToZarr"""
     mzz = generate_mzz
     test_dict = mzz.translate()
-
-    m = fsspec.get_mapper(
-        "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
-    )
-    ds = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False))
+    store = create_store(test_dict)
+    ds = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
 
     with fsspec.open_files(urls, **so) as fs:
         expts = [xr.open_dataset(f, engine="h5netcdf") for f in fs]
@@ -183,11 +172,8 @@ def test_times(times_data):
         h5chunks = SingleHdf5ToZarr(f, url)
         test_dict = h5chunks.translate()
 
-    m = fsspec.get_mapper(
-        "reference://",
-        fo=test_dict,
-    )
-    result = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False))
+    store = create_store(test_dict)
+    result = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
     expected = x1.to_dataset()
     xr.testing.assert_equal(result, expected)
 
@@ -199,11 +185,8 @@ def test_times_str(times_data):
     h5chunks = SingleHdf5ToZarr(url)
     test_dict = h5chunks.translate()
 
-    m = fsspec.get_mapper(
-        "reference://",
-        fo=test_dict,
-    )
-    result = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False))
+    store = create_store(test_dict)
+    result = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
     expected = x1.to_dataset()
     xr.testing.assert_equal(result, expected)
 
@@ -327,8 +310,8 @@ def test_compress():
                 h.translate()
             continue
         out = h.translate()
-        m = fsspec.get_mapper("reference://", fo=out)
-        g = zarr.open(m, zarr_format=2)
+        store = create_store(out)
+        g = zarr.open(store, zarr_format=2)
         assert np.mean(g.data) == 49.5
 
 
@@ -337,8 +320,8 @@ def test_embed():
     h = kerchunk.hdf.SingleHdf5ToZarr(fn, vlen_encode="embed")
     out = h.translate()
 
-    fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_format=2)
+    store = create_store(out)
+    z = zarr.open(store, zarr_format=2)
     data = z["Domain_10"]["STER"]["min_1"]["boom_1"]["temperature"][:]
     assert data[0].tolist() == [
         "2014-04-01 00:00:00.0",

From 80fedcde9a6768761ee2f36bb2ae63b6310d4492 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Thu, 10 Oct 2024 13:39:25 -0400
Subject: [PATCH 09/40] reorganize

---
 kerchunk/hdf.py            | 51 ++------------------------------------
 kerchunk/tests/test_hdf.py | 14 ++++++++---
 kerchunk/utils.py          | 44 ++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 53 deletions(-)

diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 6bb16922..6b7b443d 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -1,7 +1,7 @@
 import base64
 import io
 import logging
-from typing import Union, BinaryIO, Any, cast
+from typing import Union, BinaryIO
 from packaging.version import Version
 
 import fsspec.core
@@ -11,7 +11,7 @@
 import numcodecs
 
 from .codecs import FillStringsCodec
-from .utils import _encode_for_JSON
+from .utils import _encode_for_JSON, encode_fill_value
 
 try:
     import h5py
@@ -22,12 +22,6 @@
         "for more details."
     )
 
-# try:
-#     from zarr.meta import encode_fill_value
-# except ModuleNotFoundError:
-#     # https://github.com/zarr-developers/zarr-python/issues/2021
-#     from zarr.v2.meta import encode_fill_value
-
 lggr = logging.getLogger("h5-to-zarr")
 _HIDDEN_ATTRS = {  # from h5netcdf.attrs
     "REFERENCE_LIST",
@@ -504,7 +498,6 @@ def _translator(
                 lggr.debug(f"Created Zarr array: {za}")
                 self._transfer_attrs(h5obj, za)
 
-                # za.attrs["_ARRAY_DIMENSIONS"] = adims
                 lggr.debug(f"_ARRAY_DIMENSIONS = {adims}")
 
                 if "data" in kwargs:
@@ -705,43 +698,3 @@ def _is_netcdf_variable(dataset: h5py.Dataset):
 def has_visititems_links():
     return hasattr(h5py.Group, "visititems_links")
 
-
-def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any:
-    # early out
-    if v is None:
-        return v
-    if dtype.kind == "V" and dtype.hasobject:
-        if object_codec is None:
-            raise ValueError("missing object_codec for object array")
-        v = object_codec.encode(v)
-        v = str(base64.standard_b64encode(v), "ascii")
-        return v
-    if dtype.kind == "f":
-        if np.isnan(v):
-            return "NaN"
-        elif np.isposinf(v):
-            return "Infinity"
-        elif np.isneginf(v):
-            return "-Infinity"
-        else:
-            return float(v)
-    elif dtype.kind in "ui":
-        return int(v)
-    elif dtype.kind == "b":
-        return bool(v)
-    elif dtype.kind in "c":
-        c = cast(np.complex128, np.dtype(complex).type())
-        v = (
-            encode_fill_value(v.real, c.real.dtype, object_codec),
-            encode_fill_value(v.imag, c.imag.dtype, object_codec),
-        )
-        return v
-    elif dtype.kind in "SV":
-        v = str(base64.standard_b64encode(v), "ascii")
-        return v
-    elif dtype.kind == "U":
-        return v
-    elif dtype.kind in "mM":
-        return int(v.view("i8"))
-    else:
-        return v
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index ace45472..665cd392 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -1,5 +1,6 @@
-import asyncio
+from typing import Any
 import fsspec
+import json
 import os.path as osp
 
 import fsspec.implementations
@@ -23,25 +24,29 @@ async def list_dir(store, path):
     [x async for x in store.list_dir(path)]
 
 
-def create_store(test_dict: dict):
+def create_store(test_dict: dict, remote_options: Any = None):
     if Version(zarr.__version__) < Version("3.0.0.a0"):
         return fsspec.get_mapper(
             "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
         )
     else:
-        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict)
+        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict, remote_options=remote_options)
         return zarr.storage.RemoteStore(fs, mode="r")
 
 
 def test_single():
     """Test creating references for a single HDF file"""
-    url = "s3://noaa-nwm-retro-v2.0-pds/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp"
+    #url = "s3://noaa-nwm-retro-v2.0-pds/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp"
+    url = "s3://noaa-nos-ofs-pds/ngofs2/netcdf/202410/ngofs2.t03z.20241001.2ds.f020.nc"
     so = dict(anon=True, default_fill_cache=False, default_cache_type="none")
 
     with fsspec.open(url, **so) as f:
         h5chunks = SingleHdf5ToZarr(f, url, storage_options=so)
         test_dict = h5chunks.translate()
 
+    with open("test_dict.json", "w") as f:
+        json.dump(test_dict, f)
+
     store = create_store(test_dict)
 
     ds = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
@@ -97,6 +102,7 @@ def test_multizarr(generate_mzz):
     """Test creating a combined reference file with MultiZarrToZarr"""
     mzz = generate_mzz
     test_dict = mzz.translate()
+
     store = create_store(test_dict)
     ds = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
 
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index b52a9c0b..a0f9e96e 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -1,11 +1,13 @@
 import base64
 import copy
 import itertools
+from typing import Any, cast
 import warnings
 
 import ujson
 
 import fsspec
+import numpy as np
 import zarr
 
 
@@ -134,6 +136,48 @@ def _encode_for_JSON(store):
     return store
 
 
+
+def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any:
+    # early out
+    if v is None:
+        return v
+    if dtype.kind == "V" and dtype.hasobject:
+        if object_codec is None:
+            raise ValueError("missing object_codec for object array")
+        v = object_codec.encode(v)
+        v = str(base64.standard_b64encode(v), "ascii")
+        return v
+    if dtype.kind == "f":
+        if np.isnan(v):
+            return "NaN"
+        elif np.isposinf(v):
+            return "Infinity"
+        elif np.isneginf(v):
+            return "-Infinity"
+        else:
+            return float(v)
+    elif dtype.kind in "ui":
+        return int(v)
+    elif dtype.kind == "b":
+        return bool(v)
+    elif dtype.kind in "c":
+        c = cast(np.complex128, np.dtype(complex).type())
+        v = (
+            encode_fill_value(v.real, c.real.dtype, object_codec),
+            encode_fill_value(v.imag, c.imag.dtype, object_codec),
+        )
+        return v
+    elif dtype.kind in "SV":
+        v = str(base64.standard_b64encode(v), "ascii")
+        return v
+    elif dtype.kind == "U":
+        return v
+    elif dtype.kind in "mM":
+        return int(v.view("i8"))
+    else:
+        return v
+
+
 def do_inline(store, threshold, remote_options=None, remote_protocol=None):
     """Replace short chunks with the value of that chunk and inline metadata
 

From 1f69a0b129455ed712b1513ebf362c1c3be17b2f Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Thu, 10 Oct 2024 13:48:28 -0400
Subject: [PATCH 10/40] Save progress

---
 kerchunk/netCDF3.py           | 13 ++++++++++---
 kerchunk/tests/test_hdf.py    |  2 +-
 kerchunk/tests/test_netcdf.py | 20 ++++++++++++++++++--
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/kerchunk/netCDF3.py b/kerchunk/netCDF3.py
index d44fc808..b9d47063 100644
--- a/kerchunk/netCDF3.py
+++ b/kerchunk/netCDF3.py
@@ -1,4 +1,5 @@
 from functools import reduce
+from packaging.version import Version
 from operator import mul
 
 import numpy as np
@@ -167,7 +168,13 @@ def translate(self):
         import zarr
 
         out = self.out
-        z = zarr.open(out, mode="w", zarr_format=2)
+        if Version(zarr.__version__) < Version("3.0.0.a0"):
+            store = zarr.storage.KVStore(out)
+            z = zarr.group(store=store, overwrite=True)
+        else:
+            store = zarr.storage.MemoryStore(mode="a", store_dict=out)
+            z = zarr.open(store, mode="w", zarr_format=2)
+
         for dim, var in self.variables.items():
             if dim in self.chunks:
                 shape = self.chunks[dim][-1]
@@ -197,7 +204,7 @@ def translate(self):
                     dtype=var.data.dtype,
                     fill_value=fill,
                     chunks=shape,
-                    compression=None,
+                    compressor=None,
                 )
                 part = ".".join(["0"] * len(shape)) or "0"
                 k = f"{dim}/{part}"
@@ -251,7 +258,7 @@ def translate(self):
                     dtype=base,
                     fill_value=fill,
                     chunks=(1,) + dtype.shape,
-                    compression=None,
+                    compressor=None,
                 )
                 arr.attrs.update(
                     {
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 665cd392..233a58e4 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -27,7 +27,7 @@ async def list_dir(store, path):
 def create_store(test_dict: dict, remote_options: Any = None):
     if Version(zarr.__version__) < Version("3.0.0.a0"):
         return fsspec.get_mapper(
-            "reference://", fo=test_dict, remote_protocol="s3", remote_options=so
+            "reference://", fo=test_dict, remote_protocol="s3", remote_options=remote_options
         )
     else:
         fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict, remote_options=remote_options)
diff --git a/kerchunk/tests/test_netcdf.py b/kerchunk/tests/test_netcdf.py
index 43b6021b..0036c0a3 100644
--- a/kerchunk/tests/test_netcdf.py
+++ b/kerchunk/tests/test_netcdf.py
@@ -1,4 +1,5 @@
 import os
+from typing import Any
 
 
 import fsspec
@@ -7,6 +8,8 @@
 import pytest
 from kerchunk import netCDF3
 
+import zarr
+
 xr = pytest.importorskip("xarray")
 
 
@@ -24,16 +27,29 @@
 )
 
 
+def create_store(test_dict: dict, remote_options: Any = None):
+    if Version(zarr.__version__) < Version("3.0.0.a0"):
+        return fsspec.get_mapper(
+            "reference://", fo=test_dict, remote_protocol="s3", remote_options=remote_options
+        )
+    else:
+        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict, remote_options=remote_options)
+        return zarr.storage.RemoteStore(fs, mode="r")
+
+
 def test_one(m):
     m.pipe("data.nc3", bdata)
     h = netCDF3.netcdf_recording_file("memory://data.nc3")
     out = h.translate()
+
+    store = create_store(out, remote_options={"remote_protocol": "memory"})
+
     ds = xr.open_dataset(
-        "reference://",
+        store,
         engine="zarr",
         backend_kwargs={
             "consolidated": False,
-            "storage_options": {"fo": out, "remote_protocol": "memory"},
+            "zarr_format": "2",
         },
     )
     assert (ds.data == data).all()

From d556e528ab7f012afef68a9ec70f5bfd96c4470a Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Thu, 10 Oct 2024 15:30:11 -0400
Subject: [PATCH 11/40] Refactor to clean things up

---
 kerchunk/hdf.py            | 11 ++---
 kerchunk/netCDF3.py        |  4 +-
 kerchunk/tests/test_hdf.py | 90 +++++++++++++++++---------------------
 kerchunk/utils.py          | 37 +++++++++++++---
 kerchunk/zarr.py           | 35 +++++++++++++++
 5 files changed, 112 insertions(+), 65 deletions(-)

diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 6b7b443d..7d416f83 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -10,6 +10,8 @@
 import zarr
 import numcodecs
 
+from kerchunk.zarr import dict_to_store
+
 from .codecs import FillStringsCodec
 from .utils import _encode_for_JSON, encode_fill_value
 
@@ -107,13 +109,8 @@ def __init__(
             raise NotImplementedError
         self.vlen = vlen_encode
         self.store_dict = out or {}
-        if Version(zarr.__version__) < Version("3.0.0.a0"):
-            self.store = zarr.storage.KVStore(self.store_dict)
-            self._zroot = zarr.group(store=self.store, overwrite=True)
-        else:
-            self.store = zarr.storage.MemoryStore(mode="a", store_dict=self.store_dict)
-            self._zroot = zarr.group(store=self.store, zarr_format=2, overwrite=True)
-    
+        self.store = dict_to_store(self.store_dict)
+        self._zroot = zarr.group(store=self.store, zarr_format=2, overwrite=True)
         self._uri = url
         self.error = error
         lggr.debug(f"HDF5 file URI: {self._uri}")
diff --git a/kerchunk/netCDF3.py b/kerchunk/netCDF3.py
index b9d47063..078a5f7b 100644
--- a/kerchunk/netCDF3.py
+++ b/kerchunk/netCDF3.py
@@ -198,7 +198,7 @@ def translate(self):
                     fill = float(fill)
                 if fill is not None and var.data.dtype.kind == "i":
                     fill = int(fill)
-                arr = z.create_dataset(
+                arr = z.create_array(
                     name=dim,
                     shape=shape,
                     dtype=var.data.dtype,
@@ -252,7 +252,7 @@ def translate(self):
                     fill = float(fill)
                 if fill is not None and base.kind == "i":
                     fill = int(fill)
-                arr = z.create_dataset(
+                arr = z.create_array(
                     name=name,
                     shape=shape,
                     dtype=base,
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 233a58e4..8e2117cc 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -1,42 +1,24 @@
-from typing import Any
 import fsspec
 import json
 import os.path as osp
 
-import fsspec.implementations
-import fsspec.implementations.reference
-
 import kerchunk.hdf
 import numpy as np
 import pytest
 import xarray as xr
 import zarr
 
-from packaging.version import Version
-
 from kerchunk.hdf import SingleHdf5ToZarr, has_visititems_links
 from kerchunk.combine import MultiZarrToZarr, drop
+from kerchunk.utils import refs_as_fs, refs_as_store
+from kerchunk.zarr import fs_as_store
 
 here = osp.dirname(__file__)
 
 
-async def list_dir(store, path):
-    [x async for x in store.list_dir(path)]
-
-
-def create_store(test_dict: dict, remote_options: Any = None):
-    if Version(zarr.__version__) < Version("3.0.0.a0"):
-        return fsspec.get_mapper(
-            "reference://", fo=test_dict, remote_protocol="s3", remote_options=remote_options
-        )
-    else:
-        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict, remote_options=remote_options)
-        return zarr.storage.RemoteStore(fs, mode="r")
-
-
 def test_single():
     """Test creating references for a single HDF file"""
-    #url = "s3://noaa-nwm-retro-v2.0-pds/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp"
+    # url = "s3://noaa-nwm-retro-v2.0-pds/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp"
     url = "s3://noaa-nos-ofs-pds/ngofs2/netcdf/202410/ngofs2.t03z.20241001.2ds.f020.nc"
     so = dict(anon=True, default_fill_cache=False, default_cache_type="none")
 
@@ -47,9 +29,11 @@ def test_single():
     with open("test_dict.json", "w") as f:
         json.dump(test_dict, f)
 
-    store = create_store(test_dict)
+    store = refs_as_store(test_dict)
 
-    ds = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
+    ds = xr.open_dataset(
+        store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
+    )
 
     with fsspec.open(url, **so) as f:
         expected = xr.open_dataset(f, engine="h5netcdf")
@@ -66,7 +50,7 @@ def test_single_direct_open():
         h5f=url, inline_threshold=300, storage_options=so
     ).translate()
 
-    store = create_store(test_dict)
+    store = refs_as_store(test_dict)
 
     ds_direct = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
@@ -76,7 +60,7 @@ def test_single_direct_open():
         h5chunks = SingleHdf5ToZarr(f, url, storage_options=so)
         test_dict = h5chunks.translate()
 
-    store = create_store(test_dict)
+    store = refs_as_store(test_dict)
 
     ds_from_file_opener = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
@@ -103,8 +87,10 @@ def test_multizarr(generate_mzz):
     mzz = generate_mzz
     test_dict = mzz.translate()
 
-    store = create_store(test_dict)
-    ds = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
+    store = refs_as_store(test_dict)
+    ds = xr.open_dataset(
+        store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
+    )
 
     with fsspec.open_files(urls, **so) as fs:
         expts = [xr.open_dataset(f, engine="h5netcdf") for f in fs]
@@ -178,8 +164,10 @@ def test_times(times_data):
         h5chunks = SingleHdf5ToZarr(f, url)
         test_dict = h5chunks.translate()
 
-    store = create_store(test_dict)
-    result = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
+    store = refs_as_store(test_dict)
+    result = xr.open_dataset(
+        store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
+    )
     expected = x1.to_dataset()
     xr.testing.assert_equal(result, expected)
 
@@ -191,8 +179,10 @@ def test_times_str(times_data):
     h5chunks = SingleHdf5ToZarr(url)
     test_dict = h5chunks.translate()
 
-    store = create_store(test_dict)
-    result = xr.open_dataset(store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False))
+    store = refs_as_store(test_dict)
+    result = xr.open_dataset(
+        store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
+    )
     expected = x1.to_dataset()
     xr.testing.assert_equal(result, expected)
 
@@ -205,9 +195,10 @@ def test_string_embed():
     fn = osp.join(here, "vlen.h5")
     h = kerchunk.hdf.SingleHdf5ToZarr(fn, fn, vlen_encode="embed")
     out = h.translate()
-    fs = fsspec.filesystem("reference", fo=out)
+    fs = refs_as_fs(out)
     assert txt in fs.references["vlen_str/0"]
-    z = zarr.open(fs.get_mapper(), zarr_format=2)
+    store = fs_as_store(fs)
+    z = zarr.open(store, zarr_format=2)
     assert z.vlen_str.dtype == "O"
     assert z.vlen_str[0] == txt
     assert (z.vlen_str[1:] == "").all()
@@ -217,8 +208,8 @@ def test_string_null():
     fn = osp.join(here, "vlen.h5")
     h = kerchunk.hdf.SingleHdf5ToZarr(fn, fn, vlen_encode="null", inline_threshold=0)
     out = h.translate()
-    fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     assert z.vlen_str.dtype == "O"
     assert (z.vlen_str[:] == None).all()
 
@@ -230,8 +221,8 @@ def test_string_leave():
             f, fn, vlen_encode="leave", inline_threshold=0
         )
         out = h.translate()
-    fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     assert z.vlen_str.dtype == "S16"
     assert z.vlen_str[0]  # some obscured ID
     assert (z.vlen_str[1:] == b"").all()
@@ -244,9 +235,10 @@ def test_string_decode():
             f, fn, vlen_encode="encode", inline_threshold=0
         )
         out = h.translate()
-    fs = fsspec.filesystem("reference", fo=out)
+    fs = refs_as_fs(out)
     assert txt in fs.cat("vlen_str/.zarray").decode()  # stored in filter def
-    z = zarr.open(fs.get_mapper(), zarr_format=2)
+    store = fs_as_store(fs)
+    z = zarr.open(store, zarr_format=2)
     assert z.vlen_str[0] == txt
     assert (z.vlen_str[1:] == "").all()
 
@@ -256,8 +248,8 @@ def test_compound_string_null():
     with open(fn, "rb") as f:
         h = kerchunk.hdf.SingleHdf5ToZarr(f, fn, vlen_encode="null", inline_threshold=0)
         out = h.translate()
-    fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     assert z.vlen_str[0].tolist() == (10, None)
     assert (z.vlen_str["ints"][1:] == 0).all()
     assert (z.vlen_str["strs"][1:] == None).all()
@@ -270,8 +262,8 @@ def test_compound_string_leave():
             f, fn, vlen_encode="leave", inline_threshold=0
         )
         out = h.translate()
-    fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     assert z.vlen_str["ints"][0] == 10
     assert z.vlen_str["strs"][0]  # random ID
     assert (z.vlen_str["ints"][1:] == 0).all()
@@ -285,8 +277,8 @@ def test_compound_string_encode():
             f, fn, vlen_encode="encode", inline_threshold=0
         )
         out = h.translate()
-    fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     assert z.vlen_str["ints"][0] == 10
     assert z.vlen_str["strs"][0] == "water"
     assert (z.vlen_str["ints"][1:] == 0).all()
@@ -316,7 +308,7 @@ def test_compress():
                 h.translate()
             continue
         out = h.translate()
-        store = create_store(out)
+        store = refs_as_store(out)
         g = zarr.open(store, zarr_format=2)
         assert np.mean(g.data) == 49.5
 
@@ -326,7 +318,7 @@ def test_embed():
     h = kerchunk.hdf.SingleHdf5ToZarr(fn, vlen_encode="embed")
     out = h.translate()
 
-    store = create_store(out)
+    store = refs_as_store(out)
     z = zarr.open(store, zarr_format=2)
     data = z["Domain_10"]["STER"]["min_1"]["boom_1"]["temperature"][:]
     assert data[0].tolist() == [
@@ -361,8 +353,8 @@ def test_translate_links():
     out = kerchunk.hdf.SingleHdf5ToZarr(fn, inline_threshold=50).translate(
         preserve_linked_dsets=True
     )
-    fs = fsspec.filesystem("reference", fo=out)
-    z = zarr.open(fs.get_mapper(), zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
 
     # 1. Test the hard linked datasets were translated correctly
     # 2. Test the soft linked datasets were translated correctly
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index a0f9e96e..59aad1af 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -10,6 +10,28 @@
 import numpy as np
 import zarr
 
+from kerchunk.zarr import fs_as_store
+
+
+def refs_as_fs(refs, remote_protocol=None, remote_options=None, **kwargs):
+    """Convert a reference set to an fsspec filesystem"""
+    fs = fsspec.filesystem(
+        "reference",
+        fo=refs,
+        remote_protocol=remote_protocol,
+        remote_options=remote_options,
+        **kwargs,
+    )
+    return fs
+
+
+def refs_as_store(refs, remote_protocol=None, remote_options=None):
+    """Convert a reference set to a zarr store"""
+    fs = refs_as_fs(
+        refs, remote_protocol=remote_protocol, remote_options=remote_options
+    )
+    return fs_as_store(fs)
+
 
 def class_factory(func):
     """Experimental uniform API across function-based file scanners"""
@@ -74,7 +96,7 @@ def rename_target(refs, renames):
     -------
     dict: the altered reference set, which can be saved
     """
-    fs = fsspec.filesystem("reference", fo=refs)  # to produce normalised refs
+    fs = refs_as_fs(refs)  # to produce normalised refs
     refs = fs.references
     out = {}
     for k, v in refs.items():
@@ -136,7 +158,6 @@ def _encode_for_JSON(store):
     return store
 
 
-
 def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any:
     # early out
     if v is None:
@@ -190,6 +211,9 @@ def do_inline(store, threshold, remote_options=None, remote_protocol=None):
         remote_options=remote_options,
         remote_protocol=remote_protocol,
     )
+    fs = refs_as_fs(
+        store, remote_protocol=remote_protocol, remote_options=remote_options
+    )
     out = fs.references.copy()
 
     # Inlining is done when one of two conditions are satisfied:
@@ -267,10 +291,9 @@ def inline_array(store, threshold=1000, names=None, remote_options=None):
     -------
     amended references set (simple style)
     """
-    fs = fsspec.filesystem(
-        "reference", fo=store, **(remote_options or {}), skip_instance_cache=True
-    )
-    g = zarr.open_group(fs.get_mapper(), mode="r+", zarr_format=2)
+    fs = refs_as_fs(store, remote_options=remote_options or {})
+    zarr_store = fs_as_store(store, mode="r+", remote_options=remote_options or {})
+    g = zarr.open_group(zarr_store, mode="r+", zarr_format=2)
     _inline_array(g, threshold, names=names or [])
     return fs.references
 
@@ -293,7 +316,7 @@ def subchunk(store, variable, factor):
     -------
     modified store
     """
-    fs = fsspec.filesystem("reference", fo=store)
+    fs = refs_as_fs(store)
     store = fs.references
     meta_file = f"{variable}/.zarray"
     meta = ujson.loads(fs.cat(meta_file))
diff --git a/kerchunk/zarr.py b/kerchunk/zarr.py
index ea0612de..5560ea99 100644
--- a/kerchunk/zarr.py
+++ b/kerchunk/zarr.py
@@ -1,9 +1,44 @@
+from packaging.version import Version
+
 import fsspec
 from fsspec.implementations.reference import LazyReferenceMapper
+import zarr
 
 import kerchunk.utils
 
 
+def is_zarr3():
+    """Check if the installed zarr version is version 3"""
+    return Version(zarr.__version__) >= Version("3.0.0.a0")
+
+
+def dict_to_store(store_dict: dict):
+    """Create an in memory zarr store backed by the given dictionary"""
+    if is_zarr3():
+        return zarr.storage.MemoryStore(mode="a", store_dict=store_dict)
+    else:
+        return zarr.storage.KVStore(store_dict)
+
+
+def fs_as_store(fs, mode='r', remote_protocol=None, remote_options=None):
+    """Open the refs as a zarr store
+    
+    Parameters
+    ----------
+    refs: dict-like
+        the references to open
+    mode: str
+    
+    Returns
+    -------
+    zarr.storage.Store or zarr.storage.Mapper, fsspec.AbstractFileSystem
+    """
+    if is_zarr3():
+        return zarr.storage.RemoteStore(fs, mode=mode)
+    else:
+        return fs.get_mapper()
+
+
 def single_zarr(
     uri_or_store,
     storage_options=None,

From b27e64c5e0d0e13e83e9ae5adb297ec473d8eada Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Thu, 10 Oct 2024 16:06:03 -0400
Subject: [PATCH 12/40] Fix circular import

---
 kerchunk/hdf.py               |  5 +----
 kerchunk/tests/test_netcdf.py | 17 +++--------------
 kerchunk/utils.py             | 35 +++++++++++++++++++++++++++++++++--
 kerchunk/zarr.py              | 35 -----------------------------------
 4 files changed, 37 insertions(+), 55 deletions(-)

diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 7d416f83..bc00517f 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -2,7 +2,6 @@
 import io
 import logging
 from typing import Union, BinaryIO
-from packaging.version import Version
 
 import fsspec.core
 from fsspec.implementations.reference import LazyReferenceMapper
@@ -10,10 +9,8 @@
 import zarr
 import numcodecs
 
-from kerchunk.zarr import dict_to_store
-
 from .codecs import FillStringsCodec
-from .utils import _encode_for_JSON, encode_fill_value
+from .utils import _encode_for_JSON, encode_fill_value, dict_to_store
 
 try:
     import h5py
diff --git a/kerchunk/tests/test_netcdf.py b/kerchunk/tests/test_netcdf.py
index 0036c0a3..755823da 100644
--- a/kerchunk/tests/test_netcdf.py
+++ b/kerchunk/tests/test_netcdf.py
@@ -1,5 +1,4 @@
 import os
-from typing import Any
 
 
 import fsspec
@@ -8,7 +7,7 @@
 import pytest
 from kerchunk import netCDF3
 
-import zarr
+from kerchunk.utils import refs_as_store
 
 xr = pytest.importorskip("xarray")
 
@@ -27,29 +26,19 @@
 )
 
 
-def create_store(test_dict: dict, remote_options: Any = None):
-    if Version(zarr.__version__) < Version("3.0.0.a0"):
-        return fsspec.get_mapper(
-            "reference://", fo=test_dict, remote_protocol="s3", remote_options=remote_options
-        )
-    else:
-        fs = fsspec.implementations.reference.ReferenceFileSystem(fo=test_dict, remote_options=remote_options)
-        return zarr.storage.RemoteStore(fs, mode="r")
-
-
 def test_one(m):
     m.pipe("data.nc3", bdata)
     h = netCDF3.netcdf_recording_file("memory://data.nc3")
     out = h.translate()
 
-    store = create_store(out, remote_options={"remote_protocol": "memory"})
+    store = refs_as_store(out, remote_protocol="memory")
 
     ds = xr.open_dataset(
         store,
         engine="zarr",
         backend_kwargs={
             "consolidated": False,
-            "zarr_format": "2",
+            "zarr_format": 2,
         },
     )
     assert (ds.data == data).all()
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 59aad1af..c90f89fe 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -1,6 +1,7 @@
 import base64
 import copy
 import itertools
+from packaging.version import Version
 from typing import Any, cast
 import warnings
 
@@ -10,8 +11,6 @@
 import numpy as np
 import zarr
 
-from kerchunk.zarr import fs_as_store
-
 
 def refs_as_fs(refs, remote_protocol=None, remote_options=None, **kwargs):
     """Convert a reference set to an fsspec filesystem"""
@@ -33,6 +32,38 @@ def refs_as_store(refs, remote_protocol=None, remote_options=None):
     return fs_as_store(fs)
 
 
+def is_zarr3():
+    """Check if the installed zarr version is version 3"""
+    return Version(zarr.__version__) >= Version("3.0.0.a0")
+
+
+def dict_to_store(store_dict: dict):
+    """Create an in memory zarr store backed by the given dictionary"""
+    if is_zarr3():
+        return zarr.storage.MemoryStore(mode="a", store_dict=store_dict)
+    else:
+        return zarr.storage.KVStore(store_dict)
+
+
+def fs_as_store(fs, mode='r', remote_protocol=None, remote_options=None):
+    """Open the refs as a zarr store
+    
+    Parameters
+    ----------
+    refs: dict-like
+        the references to open
+    mode: str
+    
+    Returns
+    -------
+    zarr.storage.Store or zarr.storage.Mapper, fsspec.AbstractFileSystem
+    """
+    if is_zarr3():
+        return zarr.storage.RemoteStore(fs, mode=mode)
+    else:
+        return fs.get_mapper()
+
+
 def class_factory(func):
     """Experimental uniform API across function-based file scanners"""
 
diff --git a/kerchunk/zarr.py b/kerchunk/zarr.py
index 5560ea99..ea0612de 100644
--- a/kerchunk/zarr.py
+++ b/kerchunk/zarr.py
@@ -1,44 +1,9 @@
-from packaging.version import Version
-
 import fsspec
 from fsspec.implementations.reference import LazyReferenceMapper
-import zarr
 
 import kerchunk.utils
 
 
-def is_zarr3():
-    """Check if the installed zarr version is version 3"""
-    return Version(zarr.__version__) >= Version("3.0.0.a0")
-
-
-def dict_to_store(store_dict: dict):
-    """Create an in memory zarr store backed by the given dictionary"""
-    if is_zarr3():
-        return zarr.storage.MemoryStore(mode="a", store_dict=store_dict)
-    else:
-        return zarr.storage.KVStore(store_dict)
-
-
-def fs_as_store(fs, mode='r', remote_protocol=None, remote_options=None):
-    """Open the refs as a zarr store
-    
-    Parameters
-    ----------
-    refs: dict-like
-        the references to open
-    mode: str
-    
-    Returns
-    -------
-    zarr.storage.Store or zarr.storage.Mapper, fsspec.AbstractFileSystem
-    """
-    if is_zarr3():
-        return zarr.storage.RemoteStore(fs, mode=mode)
-    else:
-        return fs.get_mapper()
-
-
 def single_zarr(
     uri_or_store,
     storage_options=None,

From 41d6e8e2eb36b09df844755ea4cb7f38a8d3f818 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Thu, 10 Oct 2024 16:07:17 -0400
Subject: [PATCH 13/40] Iterate

---
 kerchunk/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index c90f89fe..5cab841d 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -323,7 +323,7 @@ def inline_array(store, threshold=1000, names=None, remote_options=None):
     amended references set (simple style)
     """
     fs = refs_as_fs(store, remote_options=remote_options or {})
-    zarr_store = fs_as_store(store, mode="r+", remote_options=remote_options or {})
+    zarr_store = fs_as_store(fs, mode="r+", remote_options=remote_options or {})
     g = zarr.open_group(zarr_store, mode="r+", zarr_format=2)
     _inline_array(g, threshold, names=names or [])
     return fs.references

From 7ade1a6dc2369583869a2a6d34a6953b223a9e02 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Thu, 10 Oct 2024 17:08:19 -0400
Subject: [PATCH 14/40] Change zarr dep

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6e57e223..5eb7c0c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
   "numcodecs",
   "numpy",
   "ujson",
-  "zarr==3.0.0a7",
+  "zarr",
 ]
 
 [project.optional-dependencies]

From 492ddeebac4d844ce63ee6aa93b14f5ce613efed Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Thu, 10 Oct 2024 17:31:49 -0400
Subject: [PATCH 15/40] More conversion

---
 kerchunk/fits.py            |  7 ++++---
 kerchunk/hdf.py             | 13 ++-----------
 kerchunk/netCDF3.py         | 11 ++++-------
 kerchunk/tests/test_fits.py | 22 ++++++++++++----------
 kerchunk/tests/test_tiff.py | 10 ++++++----
 kerchunk/utils.py           | 37 ++++++++++++++++++++++++++++++++++---
 6 files changed, 62 insertions(+), 38 deletions(-)

diff --git a/kerchunk/fits.py b/kerchunk/fits.py
index f50bef64..f0d4fa8e 100644
--- a/kerchunk/fits.py
+++ b/kerchunk/fits.py
@@ -8,7 +8,7 @@
 from fsspec.implementations.reference import LazyReferenceMapper
 
 
-from kerchunk.utils import class_factory
+from kerchunk.utils import class_factory, dict_to_store
 from kerchunk.codecs import AsciiTableCodec, VarArrCodec
 
 try:
@@ -72,7 +72,8 @@ def process_file(
 
     storage_options = storage_options or {}
     out = out or {}
-    g = zarr.open(out, zarr_format=2)
+    store = dict_to_store(out)
+    g = zarr.open_group(store=store, zarr_format=2)
 
     with fsspec.open(url, mode="rb", **storage_options) as f:
         infile = fits.open(f, do_not_scale_image_data=True)
@@ -164,7 +165,7 @@ def process_file(
             # TODO: we could sub-chunk on biggest dimension
             name = hdu.name or str(ext)
             arr = g.empty(
-                name, dtype=dtype, shape=shape, chunks=shape, compression=None, **kwargs
+                name=name, dtype=dtype, shape=shape, chunks=shape, compressor=None, zarr_format=2, **kwargs
             )
             arr.attrs.update(
                 {
diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index bc00517f..7cb4b5f6 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -10,7 +10,7 @@
 import numcodecs
 
 from .codecs import FillStringsCodec
-from .utils import _encode_for_JSON, encode_fill_value, dict_to_store
+from .utils import _encode_for_JSON, encode_fill_value, dict_to_store, translate_refs_serializable
 
 try:
     import h5py
@@ -150,16 +150,7 @@ def translate(self, preserve_linked_dsets=False):
             self.store.flush()
             return self.store
         else:
-            keys_to_remove = []
-            new_keys = {}
-            for k, v in self.store_dict.items():
-                if isinstance(v, zarr.core.buffer.cpu.Buffer):
-                    key = str.removeprefix(k, "/")
-                    new_keys[key] = v.to_bytes()
-                    keys_to_remove.append(k)
-            for k in keys_to_remove:
-                del self.store_dict[k]
-            self.store_dict.update(new_keys)
+            translate_refs_serializable(self.store_dict)
             store = _encode_for_JSON(self.store_dict)
             return {"version": 1, "refs": store}
 
diff --git a/kerchunk/netCDF3.py b/kerchunk/netCDF3.py
index 078a5f7b..31438bb0 100644
--- a/kerchunk/netCDF3.py
+++ b/kerchunk/netCDF3.py
@@ -6,7 +6,7 @@
 from fsspec.implementations.reference import LazyReferenceMapper
 import fsspec
 
-from kerchunk.utils import _encode_for_JSON, inline_array
+from kerchunk.utils import _encode_for_JSON, dict_to_store, inline_array, translate_refs_serializable
 
 try:
     from scipy.io._netcdf import ZERO, NC_VARIABLE, netcdf_file, netcdf_variable
@@ -168,12 +168,8 @@ def translate(self):
         import zarr
 
         out = self.out
-        if Version(zarr.__version__) < Version("3.0.0.a0"):
-            store = zarr.storage.KVStore(out)
-            z = zarr.group(store=store, overwrite=True)
-        else:
-            store = zarr.storage.MemoryStore(mode="a", store_dict=out)
-            z = zarr.open(store, mode="w", zarr_format=2)
+        store = dict_to_store(out)
+        z = zarr.open(store, mode="w", zarr_format=2, overwrite=True)
 
         for dim, var in self.variables.items():
             if dim in self.chunks:
@@ -302,6 +298,7 @@ def translate(self):
             out.flush()
             return out
         else:
+            translate_refs_serializable(out)
             out = _encode_for_JSON(out)
             return {"version": 1, "refs": out}
 
diff --git a/kerchunk/tests/test_fits.py b/kerchunk/tests/test_fits.py
index 5d7c3b6d..de2cad5f 100644
--- a/kerchunk/tests/test_fits.py
+++ b/kerchunk/tests/test_fits.py
@@ -2,6 +2,8 @@
 import fsspec
 import pytest
 
+from kerchunk.utils import refs_as_store
+
 
 fits = pytest.importorskip("astropy.io.fits")
 import kerchunk.fits
@@ -17,8 +19,8 @@ def test_ascii_table():
     # this one directly hits a remote server - should cache?
     url = "https://fits.gsfc.nasa.gov/samples/WFPC2u5780205r_c0fx.fits"
     out = kerchunk.fits.process_file(url, extension=1)
-    m = fsspec.get_mapper("reference://", fo=out, remote_protocol="https")
-    g = zarr.open(m, zarr_format=2)
+    store = refs_as_store(out, remote_protocol="https")
+    g = zarr.open(store, zarr_format=2)
     arr = g["u5780205r_cvt.c0h.tab"][:]
     with fsspec.open(
         "https://fits.gsfc.nasa.gov/samples/WFPC2u5780205r_c0fx.fits"
@@ -30,8 +32,8 @@ def test_ascii_table():
 
 def test_binary_table():
     out = kerchunk.fits.process_file(btable, extension=1)
-    m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     arr = z["1"]
     with open(btable, "rb") as f:
         hdul = fits.open(f)
@@ -47,8 +49,8 @@ def test_binary_table():
 
 def test_cube():
     out = kerchunk.fits.process_file(range_im)
-    m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     arr = z["PRIMARY"]
     with open(range_im, "rb") as f:
         hdul = fits.open(f)
@@ -60,8 +62,8 @@ def test_with_class():
     ftz = kerchunk.fits.FitsToZarr(range_im)
     out = ftz.translate()
     assert "fits" in repr(ftz)
-    m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     arr = z["PRIMARY"]
     with open(range_im, "rb") as f:
         hdul = fits.open(f)
@@ -75,8 +77,8 @@ def test_var():
 
     ftz = kerchunk.fits.FitsToZarr(var)
     out = ftz.translate()
-    m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     arr = z["1"]
     vars = [_.tolist() for _ in arr["var"]]
 
diff --git a/kerchunk/tests/test_tiff.py b/kerchunk/tests/test_tiff.py
index 74ba59a4..b81e7bab 100644
--- a/kerchunk/tests/test_tiff.py
+++ b/kerchunk/tests/test_tiff.py
@@ -5,6 +5,8 @@
 import pytest
 import xarray as xr
 
+from kerchunk.utils import refs_as_store
+
 pytest.importorskip("tifffile")
 pytest.importorskip("rioxarray")
 import kerchunk.tiff
@@ -15,8 +17,8 @@
 def test_one():
     fn = files[0]
     out = kerchunk.tiff.tiff_to_zarr(fn)
-    m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_format=2)
+    store = refs_as_store(out)
+    z = zarr.open(store, zarr_format=2)
     assert list(z) == ["0", "1", "2"]
     assert z.attrs["multiscales"] == [
         {
@@ -33,8 +35,8 @@ def test_one():
 def test_coord():
     fn = files[0]
     out = kerchunk.tiff.tiff_to_zarr(fn)
-    m = fsspec.get_mapper("reference://", fo=out)
-    z = zarr.open(m, zarr_format=2)  # highest res is the one xarray picks
+    store = refs_as_store(out)
+    z = zarr.open(out, zarr_format=2)  # highest res is the one xarray picks
     out = kerchunk.tiff.generate_coords(z.attrs, z[0].shape)
 
     ds = xr.open_dataset(fn)
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 5cab841d..71cee56a 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -45,15 +45,15 @@ def dict_to_store(store_dict: dict):
         return zarr.storage.KVStore(store_dict)
 
 
-def fs_as_store(fs, mode='r', remote_protocol=None, remote_options=None):
+def fs_as_store(fs, mode="r", remote_protocol=None, remote_options=None):
     """Open the refs as a zarr store
-    
+
     Parameters
     ----------
     refs: dict-like
         the references to open
     mode: str
-    
+
     Returns
     -------
     zarr.storage.Store or zarr.storage.Mapper, fsspec.AbstractFileSystem
@@ -538,3 +538,34 @@ def templateize(strings, min_length=10, template_name="u"):
     else:
         template = {}
     return template, strings
+
+
+def translate_refs_serializable(refs: dict):
+    """Translate a reference set to a serializable form, given that zarr 
+    v3 memory stores store data in buffers by default. This modifies the 
+    input dictionary in place, and returns a reference to it.
+
+    It also fixes keys that have a leading slash, which is not appropriate for 
+    zarr v3 keys 
+
+    Parameters
+    ----------
+    refs: dict
+        The reference set
+    
+    Returns
+    -------
+    dict
+        A serializable form of the reference set
+    """
+    keys_to_remove = []
+    new_keys = {}
+    for k, v in refs.items():
+        if isinstance(v, zarr.core.buffer.cpu.Buffer):
+            key = k.removeprefix("/")
+            new_keys[key] = v.to_bytes()
+            keys_to_remove.append(k)
+    for k in keys_to_remove:
+        del refs[k]
+    refs.update(new_keys)
+    return refs
\ No newline at end of file

From 6e5741ca7d4fe25a9d37bbc3d72266e28c6695de Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 15 Oct 2024 09:48:05 -0400
Subject: [PATCH 16/40] Specify zarr version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5eb7c0c9..3c361a2d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
   "numcodecs",
   "numpy",
   "ujson",
-  "zarr",
+  "zarr==3.0.0b0",
 ]
 
 [project.optional-dependencies]

From c0316ace9b18455aece8d0910a33cd4791e083ce Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Wed, 23 Oct 2024 09:31:10 -0400
Subject: [PATCH 17/40] Working remote hdf tests

---
 kerchunk/hdf.py            |  2 +-
 kerchunk/tests/test_hdf.py | 22 +++++++++++-----------
 kerchunk/utils.py          | 37 ++++++++++++++++++++++++-------------
 kerchunk/xarray_backend.py |  4 +++-
 pyproject.toml             |  2 +-
 5 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 7cb4b5f6..1d4d0054 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -461,7 +461,7 @@ def _translator(
                     if h5obj.attrs.get("_FillValue") is not None:
                         fill = h5obj.attrs.get("_FillValue")
                         fill = encode_fill_value(
-                            h5obj.attrs.get("_FillValue"), dt or h5obj.dtype
+                            fill, dt or h5obj.dtype
                         )
 
                 adims = self._get_array_dims(h5obj)
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 8e2117cc..f600a127 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -1,7 +1,12 @@
+import asyncio
 import fsspec
 import json
 import os.path as osp
 
+import zarr.core
+import zarr.core.buffer
+import zarr.core.group
+
 import kerchunk.hdf
 import numpy as np
 import pytest
@@ -11,33 +16,28 @@
 from kerchunk.hdf import SingleHdf5ToZarr, has_visititems_links
 from kerchunk.combine import MultiZarrToZarr, drop
 from kerchunk.utils import refs_as_fs, refs_as_store
-from kerchunk.zarr import fs_as_store
+from kerchunk.utils import fs_as_store
 
 here = osp.dirname(__file__)
 
 
 def test_single():
     """Test creating references for a single HDF file"""
-    # url = "s3://noaa-nwm-retro-v2.0-pds/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp"
-    url = "s3://noaa-nos-ofs-pds/ngofs2/netcdf/202410/ngofs2.t03z.20241001.2ds.f020.nc"
+    url = "s3://noaa-nwm-retro-v2.0-pds/full_physics/2017/201704010000.CHRTOUT_DOMAIN1.comp"
     so = dict(anon=True, default_fill_cache=False, default_cache_type="none")
 
     with fsspec.open(url, **so) as f:
-        h5chunks = SingleHdf5ToZarr(f, url, storage_options=so)
+        h5chunks = SingleHdf5ToZarr(f, url, storage_options=so, inline_threshold=1)
         test_dict = h5chunks.translate()
 
     with open("test_dict.json", "w") as f:
         json.dump(test_dict, f)
 
-    store = refs_as_store(test_dict)
-
-    ds = xr.open_dataset(
-        store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
-    )
+    store = refs_as_store(test_dict, remote_options=dict(asynchronous=True, anon=True))
+    ds = xr.open_zarr(store, zarr_format=2, consolidated=False)
 
     with fsspec.open(url, **so) as f:
         expected = xr.open_dataset(f, engine="h5netcdf")
-
         xr.testing.assert_equal(ds.drop_vars("crs"), expected.drop_vars("crs"))
 
 
@@ -164,7 +164,7 @@ def test_times(times_data):
         h5chunks = SingleHdf5ToZarr(f, url)
         test_dict = h5chunks.translate()
 
-    store = refs_as_store(test_dict)
+    store = refs_as_store(test_dict, remote_protocol="file")
     result = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
     )
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 71cee56a..8cc2f765 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -1,6 +1,7 @@
 import base64
 import copy
 import itertools
+import fsspec.asyn
 from packaging.version import Version
 from typing import Any, cast
 import warnings
@@ -24,12 +25,23 @@ def refs_as_fs(refs, remote_protocol=None, remote_options=None, **kwargs):
     return fs
 
 
-def refs_as_store(refs, remote_protocol=None, remote_options=None):
+def refs_as_store(refs, mode="r", remote_protocol=None, remote_options=None):
     """Convert a reference set to a zarr store"""
+    asynchronous = False
+    if is_zarr3():
+        asynchronous = True
+        if remote_options is None:
+            remote_options = {"asynchronous": True}
+        else:
+            remote_options["asynchronous"] = True
+
     fs = refs_as_fs(
-        refs, remote_protocol=remote_protocol, remote_options=remote_options
+        refs,
+        remote_protocol=remote_protocol,
+        remote_options=remote_options,
+        asynchronous=asynchronous,
     )
-    return fs_as_store(fs)
+    return fs_as_store(fs, mode=mode)
 
 
 def is_zarr3():
@@ -40,18 +52,17 @@ def is_zarr3():
 def dict_to_store(store_dict: dict):
     """Create an in memory zarr store backed by the given dictionary"""
     if is_zarr3():
-        return zarr.storage.MemoryStore(mode="a", store_dict=store_dict)
+        return zarr.storage.MemoryStore(mode="w", store_dict=store_dict)
     else:
         return zarr.storage.KVStore(store_dict)
 
 
-def fs_as_store(fs, mode="r", remote_protocol=None, remote_options=None):
+def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, mode="r"):
     """Open the refs as a zarr store
 
     Parameters
     ----------
-    refs: dict-like
-        the references to open
+    fs: fsspec.async.AsyncFileSystem
     mode: str
 
     Returns
@@ -541,18 +552,18 @@ def templateize(strings, min_length=10, template_name="u"):
 
 
 def translate_refs_serializable(refs: dict):
-    """Translate a reference set to a serializable form, given that zarr 
-    v3 memory stores store data in buffers by default. This modifies the 
+    """Translate a reference set to a serializable form, given that zarr
+    v3 memory stores store data in buffers by default. This modifies the
     input dictionary in place, and returns a reference to it.
 
-    It also fixes keys that have a leading slash, which is not appropriate for 
-    zarr v3 keys 
+    It also fixes keys that have a leading slash, which is not appropriate for
+    zarr v3 keys
 
     Parameters
     ----------
     refs: dict
         The reference set
-    
+
     Returns
     -------
     dict
@@ -568,4 +579,4 @@ def translate_refs_serializable(refs: dict):
     for k in keys_to_remove:
         del refs[k]
     refs.update(new_keys)
-    return refs
\ No newline at end of file
+    return refs
diff --git a/kerchunk/xarray_backend.py b/kerchunk/xarray_backend.py
index ca377f6d..dfbbafba 100644
--- a/kerchunk/xarray_backend.py
+++ b/kerchunk/xarray_backend.py
@@ -43,4 +43,6 @@ def open_reference_dataset(
 
     m = fsspec.get_mapper("reference://", fo=filename_or_obj, **storage_options)
 
-    return xr.open_dataset(m, engine="zarr", consolidated=False, **open_dataset_options)
+    return xr.open_dataset(
+        m, engine="zarr", zarr_format=2, consolidated=False, **open_dataset_options
+    )
diff --git a/pyproject.toml b/pyproject.toml
index 3c361a2d..5eb7c0c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
   "numcodecs",
   "numpy",
   "ujson",
-  "zarr==3.0.0b0",
+  "zarr",
 ]
 
 [project.optional-dependencies]

From 59bd36cafd33b9ec3c29ddf90e9041197e38dc30 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Wed, 23 Oct 2024 10:03:25 -0400
Subject: [PATCH 18/40] Working grib impl

---
 kerchunk/grib2.py           | 27 ++++++++------
 kerchunk/tests/test_grib.py | 73 ++++++++++++++++++-------------------
 2 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py
index 7d75786f..e4e64bf3 100644
--- a/kerchunk/grib2.py
+++ b/kerchunk/grib2.py
@@ -11,7 +11,7 @@
 import xarray
 import numpy as np
 
-from kerchunk.utils import class_factory, _encode_for_JSON
+from kerchunk.utils import class_factory, _encode_for_JSON, dict_to_store, translate_refs_serializable
 from kerchunk.codecs import GRIBCodec
 from kerchunk.combine import MultiZarrToZarr, drop
 from kerchunk._grib_idx import parse_grib_idx, build_idx_grib_mapping, map_from_index
@@ -71,13 +71,13 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
     shape = tuple(data.shape or ())
     if nbytes < inline_threshold:
         logger.debug(f"Store {var} inline")
-        d = z.create_dataset(
+        d = z.create_array(
             name=var,
             shape=shape,
             chunks=shape,
             dtype=data.dtype,
             fill_value=attr.get("missingValue", None),
-            compressor=False,
+            compressor=None,
         )
         if hasattr(data, "tobytes"):
             b = data.tobytes()
@@ -91,15 +91,14 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
         store[f"{var}/0"] = b.decode("ascii")
     else:
         logger.debug(f"Store {var} reference")
-        d = z.create_dataset(
+        d = z.create_array(
             name=var,
             shape=shape,
             chunks=shape,
             dtype=data.dtype,
             fill_value=attr.get("missingValue", None),
             filters=[GRIBCodec(var=var, dtype=str(data.dtype))],
-            compressor=False,
-            overwrite=True,
+            compressor=None,
         )
         store[f"{var}/" + ".".join(["0"] * len(shape))] = ["{{u}}", offset, size]
     d.attrs.update(attr)
@@ -153,7 +152,9 @@ def scan_grib(
     with fsspec.open(url, "rb", **storage_options) as f:
         logger.debug(f"File {url}")
         for offset, size, data in _split_file(f, skip=skip):
-            store = {}
+            store_dict = {}
+            store = dict_to_store(store_dict)
+
             mid = eccodes.codes_new_from_message(data)
             m = cfgrib.cfmessage.CfMessage(mid)
 
@@ -227,7 +228,7 @@ def scan_grib(
             varName = m["cfVarName"]
             if varName in ("undef", "unknown"):
                 varName = m["shortName"]
-            _store_array(store, z, vals, varName, inline_threshold, offset, size, attrs)
+            _store_array(store_dict, z, vals, varName, inline_threshold, offset, size, attrs)
             if "typeOfLevel" in message_keys and "level" in message_keys:
                 name = m["typeOfLevel"]
                 coordinates.append(name)
@@ -241,7 +242,7 @@ def scan_grib(
                     attrs = {}
                 attrs["_ARRAY_DIMENSIONS"] = []
                 _store_array(
-                    store, z, data, name, inline_threshold, offset, size, attrs
+                    store_dict, z, data, name, inline_threshold, offset, size, attrs
                 )
             dims = (
                 ["y", "x"]
@@ -298,7 +299,7 @@ def scan_grib(
                     dims = [coord]
                 attrs = cfgrib.dataset.COORD_ATTRS[coord]
                 _store_array(
-                    store,
+                    store_dict,
                     z,
                     x,
                     coord,
@@ -311,10 +312,11 @@ def scan_grib(
             if coordinates:
                 z.attrs["coordinates"] = " ".join(coordinates)
 
+            translate_refs_serializable(store_dict)
             out.append(
                 {
                     "version": 1,
-                    "refs": _encode_for_JSON(store),
+                    "refs": _encode_for_JSON(store_dict),
                     "templates": {"u": url},
                 }
             )
@@ -397,7 +399,8 @@ def grib_tree(
     filters = ["stepType", "typeOfLevel"]
 
     # TODO allow passing a LazyReferenceMapper as output?
-    zarr_store = {}
+    zarr_store_dict = {}
+    zarr_store = dict_to_store(zarr_store_dict)
     zroot = zarr.open_group(store=zarr_store, zarr_format=2)
 
     aggregations: Dict[str, List] = defaultdict(list)
diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py
index 9102529e..74f24a6d 100644
--- a/kerchunk/tests/test_grib.py
+++ b/kerchunk/tests/test_grib.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import pytest
 import xarray as xr
-import datatree
+#import datatree
 import zarr
 import ujson
 from kerchunk.grib2 import (
@@ -21,6 +21,7 @@
     extract_dataset_chunk_index,
     extract_datatree_chunk_index,
 )
+from kerchunk.utils import refs_as_store
 
 eccodes_ver = tuple(int(i) for i in eccodes.__version__.split("."))
 cfgrib = pytest.importorskip("cfgrib")
@@ -68,17 +69,13 @@ def _fetch_first(url):
 def test_archives(tmpdir, url):
     grib = GribToZarr(url, storage_options={"anon": True}, skip=1)
     out = grib.translate()[0]
-    ours = xr.open_dataset(
-        "reference://",
-        engine="zarr",
-        backend_kwargs={
-            "consolidated": False,
-            "storage_options": {
-                "fo": out,
-                "remote_protocol": "s3",
-                "remote_options": {"anon": True},
-            },
-        },
+
+    store = refs_as_store(out)
+
+    ours = xr.open_zarr(
+        store,
+        zarr_format=2,
+        consolidated=False,
     )
 
     data = _fetch_first(url)
@@ -266,22 +263,22 @@ def test_hrrr_sfcf_grib_tree():
     assert zg.u.instant.isobaricInhPa.time.shape == (1,)
 
 
-def test_hrrr_sfcf_grib_datatree():
-    fpath = os.path.join(here, "hrrr.wrfsfcf.subset.json")
-    with open(fpath, "rb") as fobj:
-        scanned_msgs = ujson.load(fobj)
-    merged = grib_tree(scanned_msgs)
-    dt = datatree.open_datatree(
-        fsspec.filesystem("reference", fo=merged).get_mapper(""),
-        engine="zarr",
-        consolidated=False,
-    )
-    # Assert a few things... but if it loads we are mostly done.
-    np.testing.assert_array_equal(
-        dt.u.instant.heightAboveGround.step.values[:],
-        np.array([0, 3600 * 10**9], dtype="timedelta64[ns]"),
-    )
-    assert dt.u.attrs == dict(name="U component of wind")
+# def test_hrrr_sfcf_grib_datatree():
+#     fpath = os.path.join(here, "hrrr.wrfsfcf.subset.json")
+#     with open(fpath, "rb") as fobj:
+#         scanned_msgs = ujson.load(fobj)
+#     merged = grib_tree(scanned_msgs)
+#     dt = datatree.open_datatree(
+#         fsspec.filesystem("reference", fo=merged).get_mapper(""),
+#         engine="zarr",
+#         consolidated=False,
+#     )
+#     # Assert a few things... but if it loads we are mostly done.
+#     np.testing.assert_array_equal(
+#         dt.u.instant.heightAboveGround.step.values[:],
+#         np.array([0, 3600 * 10**9], dtype="timedelta64[ns]"),
+#     )
+#     assert dt.u.attrs == dict(name="U component of wind")
 
 
 def test_parse_grib_idx_invalid_url():
@@ -345,17 +342,17 @@ def test_parse_grib_idx_content(idx_url, storage_options):
     assert idx_df.iloc[message_no]["length"] == output[message_no]["refs"][variable][2]
 
 
-@pytest.fixture
-def zarr_tree_and_datatree_instance():
-    fn = os.path.join(here, "gfs.t00z.pgrb2.0p25.f006.test-limit-100")
-    tree_store = tree_store = grib_tree(scan_grib(fn))
-    dt_instance = datatree.open_datatree(
-        fsspec.filesystem("reference", fo=tree_store).get_mapper(""),
-        engine="zarr",
-        consolidated=False,
-    )
+# @pytest.fixture
+# def zarr_tree_and_datatree_instance():
+#     fn = os.path.join(here, "gfs.t00z.pgrb2.0p25.f006.test-limit-100")
+#     tree_store = tree_store = grib_tree(scan_grib(fn))
+#     dt_instance = datatree.open_datatree(
+#         fsspec.filesystem("reference", fo=tree_store).get_mapper(""),
+#         engine="zarr",
+#         consolidated=False,
+#     )
 
-    return tree_store, dt_instance, fn
+#     return tree_store, dt_instance, fn
 
 
 def test_extract_dataset_chunk_index(zarr_tree_and_datatree_instance):

From 187ced261feeda286fae65dbe8dda7e9b3da7c7c Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Wed, 23 Oct 2024 10:04:22 -0400
Subject: [PATCH 19/40] Add back commented out code

---
 kerchunk/tests/test_grib.py | 56 ++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py
index 74f24a6d..f0e58f9d 100644
--- a/kerchunk/tests/test_grib.py
+++ b/kerchunk/tests/test_grib.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import pytest
 import xarray as xr
-#import datatree
+import datatree
 import zarr
 import ujson
 from kerchunk.grib2 import (
@@ -75,7 +75,7 @@ def test_archives(tmpdir, url):
     ours = xr.open_zarr(
         store,
         zarr_format=2,
-        consolidated=False,
+        consolidated=False
     )
 
     data = _fetch_first(url)
@@ -263,22 +263,22 @@ def test_hrrr_sfcf_grib_tree():
     assert zg.u.instant.isobaricInhPa.time.shape == (1,)
 
 
-# def test_hrrr_sfcf_grib_datatree():
-#     fpath = os.path.join(here, "hrrr.wrfsfcf.subset.json")
-#     with open(fpath, "rb") as fobj:
-#         scanned_msgs = ujson.load(fobj)
-#     merged = grib_tree(scanned_msgs)
-#     dt = datatree.open_datatree(
-#         fsspec.filesystem("reference", fo=merged).get_mapper(""),
-#         engine="zarr",
-#         consolidated=False,
-#     )
-#     # Assert a few things... but if it loads we are mostly done.
-#     np.testing.assert_array_equal(
-#         dt.u.instant.heightAboveGround.step.values[:],
-#         np.array([0, 3600 * 10**9], dtype="timedelta64[ns]"),
-#     )
-#     assert dt.u.attrs == dict(name="U component of wind")
+def test_hrrr_sfcf_grib_datatree():
+    fpath = os.path.join(here, "hrrr.wrfsfcf.subset.json")
+    with open(fpath, "rb") as fobj:
+        scanned_msgs = ujson.load(fobj)
+    merged = grib_tree(scanned_msgs)
+    dt = datatree.open_datatree(
+        fsspec.filesystem("reference", fo=merged).get_mapper(""),
+        engine="zarr",
+        consolidated=False,
+    )
+    # Assert a few things... but if it loads we are mostly done.
+    np.testing.assert_array_equal(
+        dt.u.instant.heightAboveGround.step.values[:],
+        np.array([0, 3600 * 10**9], dtype="timedelta64[ns]"),
+    )
+    assert dt.u.attrs == dict(name="U component of wind")
 
 
 def test_parse_grib_idx_invalid_url():
@@ -342,17 +342,17 @@ def test_parse_grib_idx_content(idx_url, storage_options):
     assert idx_df.iloc[message_no]["length"] == output[message_no]["refs"][variable][2]
 
 
-# @pytest.fixture
-# def zarr_tree_and_datatree_instance():
-#     fn = os.path.join(here, "gfs.t00z.pgrb2.0p25.f006.test-limit-100")
-#     tree_store = tree_store = grib_tree(scan_grib(fn))
-#     dt_instance = datatree.open_datatree(
-#         fsspec.filesystem("reference", fo=tree_store).get_mapper(""),
-#         engine="zarr",
-#         consolidated=False,
-#     )
+@pytest.fixture
+def zarr_tree_and_datatree_instance():
+    fn = os.path.join(here, "gfs.t00z.pgrb2.0p25.f006.test-limit-100")
+    tree_store = tree_store = grib_tree(scan_grib(fn))
+    dt_instance = datatree.open_datatree(
+        fsspec.filesystem("reference", fo=tree_store).get_mapper(""),
+        engine="zarr",
+        consolidated=False,
+    )
 
-#     return tree_store, dt_instance, fn
+    return tree_store, dt_instance, fn
 
 
 def test_extract_dataset_chunk_index(zarr_tree_and_datatree_instance):

From 690ed21922cd4255eb39a795674bf38372c87427 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Wed, 23 Oct 2024 11:28:58 -0400
Subject: [PATCH 20/40] Make grib codec a compressor since its bytes to array

---
 kerchunk/grib2.py           |  4 +--
 kerchunk/tests/test_grib.py | 54 ++++++++++++++++++-------------------
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py
index e4e64bf3..eb796e2e 100644
--- a/kerchunk/grib2.py
+++ b/kerchunk/grib2.py
@@ -97,8 +97,8 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
             chunks=shape,
             dtype=data.dtype,
             fill_value=attr.get("missingValue", None),
-            filters=[GRIBCodec(var=var, dtype=str(data.dtype))],
-            compressor=None,
+            filters=[],
+            compressor=GRIBCodec(var=var, dtype=str(data.dtype)),
         )
         store[f"{var}/" + ".".join(["0"] * len(shape))] = ["{{u}}", offset, size]
     d.attrs.update(attr)
diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py
index f0e58f9d..7d9cf32b 100644
--- a/kerchunk/tests/test_grib.py
+++ b/kerchunk/tests/test_grib.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import pytest
 import xarray as xr
-import datatree
+#import datatree
 import zarr
 import ujson
 from kerchunk.grib2 import (
@@ -263,22 +263,22 @@ def test_hrrr_sfcf_grib_tree():
     assert zg.u.instant.isobaricInhPa.time.shape == (1,)
 
 
-def test_hrrr_sfcf_grib_datatree():
-    fpath = os.path.join(here, "hrrr.wrfsfcf.subset.json")
-    with open(fpath, "rb") as fobj:
-        scanned_msgs = ujson.load(fobj)
-    merged = grib_tree(scanned_msgs)
-    dt = datatree.open_datatree(
-        fsspec.filesystem("reference", fo=merged).get_mapper(""),
-        engine="zarr",
-        consolidated=False,
-    )
-    # Assert a few things... but if it loads we are mostly done.
-    np.testing.assert_array_equal(
-        dt.u.instant.heightAboveGround.step.values[:],
-        np.array([0, 3600 * 10**9], dtype="timedelta64[ns]"),
-    )
-    assert dt.u.attrs == dict(name="U component of wind")
+# def test_hrrr_sfcf_grib_datatree():
+#     fpath = os.path.join(here, "hrrr.wrfsfcf.subset.json")
+#     with open(fpath, "rb") as fobj:
+#         scanned_msgs = ujson.load(fobj)
+#     merged = grib_tree(scanned_msgs)
+#     dt = datatree.open_datatree(
+#         fsspec.filesystem("reference", fo=merged).get_mapper(""),
+#         engine="zarr",
+#         consolidated=False,
+#     )
+#     # Assert a few things... but if it loads we are mostly done.
+#     np.testing.assert_array_equal(
+#         dt.u.instant.heightAboveGround.step.values[:],
+#         np.array([0, 3600 * 10**9], dtype="timedelta64[ns]"),
+#     )
+#     assert dt.u.attrs == dict(name="U component of wind")
 
 
 def test_parse_grib_idx_invalid_url():
@@ -342,17 +342,17 @@ def test_parse_grib_idx_content(idx_url, storage_options):
     assert idx_df.iloc[message_no]["length"] == output[message_no]["refs"][variable][2]
 
 
-@pytest.fixture
-def zarr_tree_and_datatree_instance():
-    fn = os.path.join(here, "gfs.t00z.pgrb2.0p25.f006.test-limit-100")
-    tree_store = tree_store = grib_tree(scan_grib(fn))
-    dt_instance = datatree.open_datatree(
-        fsspec.filesystem("reference", fo=tree_store).get_mapper(""),
-        engine="zarr",
-        consolidated=False,
-    )
+# @pytest.fixture
+# def zarr_tree_and_datatree_instance():
+#     fn = os.path.join(here, "gfs.t00z.pgrb2.0p25.f006.test-limit-100")
+#     tree_store = tree_store = grib_tree(scan_grib(fn))
+#     dt_instance = datatree.open_datatree(
+#         fsspec.filesystem("reference", fo=tree_store).get_mapper(""),
+#         engine="zarr",
+#         consolidated=False,
+#     )
 
-    return tree_store, dt_instance, fn
+#     return tree_store, dt_instance, fn
 
 
 def test_extract_dataset_chunk_index(zarr_tree_and_datatree_instance):

From 5019b154903199514a0484f71f625971879defe6 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Wed, 23 Oct 2024 11:36:59 -0400
Subject: [PATCH 21/40] Switch back

---
 kerchunk/grib2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py
index eb796e2e..e4e64bf3 100644
--- a/kerchunk/grib2.py
+++ b/kerchunk/grib2.py
@@ -97,8 +97,8 @@ def _store_array(store, z, data, var, inline_threshold, offset, size, attr):
             chunks=shape,
             dtype=data.dtype,
             fill_value=attr.get("missingValue", None),
-            filters=[],
-            compressor=GRIBCodec(var=var, dtype=str(data.dtype)),
+            filters=[GRIBCodec(var=var, dtype=str(data.dtype))],
+            compressor=None,
         )
         store[f"{var}/" + ".".join(["0"] * len(shape))] = ["{{u}}", offset, size]
     d.attrs.update(attr)

From d96cf469c3beca0ac28df23d2f96ec831d169069 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Sat, 26 Oct 2024 16:42:03 -0400
Subject: [PATCH 22/40] Add first pass at grib zarr 3 codec

---
 kerchunk/codecs.py | 87 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 84 insertions(+), 3 deletions(-)

diff --git a/kerchunk/codecs.py b/kerchunk/codecs.py
index 852076ea..4804423e 100644
--- a/kerchunk/codecs.py
+++ b/kerchunk/codecs.py
@@ -1,11 +1,22 @@
 import ast
+from dataclasses import dataclass
 import io
+from typing import TYPE_CHECKING
 
 import numcodecs
 from numcodecs.abc import Codec
 import numpy as np
 import threading
 import zlib
+from zarr.abc.codec import ArrayBytesCodec
+from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
+from zarr.core.common import JSON, parse_enum, parse_named_configuration
+from zarr.registry import register_codec
+
+if TYPE_CHECKING:
+    from typing import Self
+
+    from zarr.core.array_spec import ArraySpec
 
 
 class FillStringsCodec(Codec):
@@ -115,6 +126,78 @@ def decode(self, buf, out=None):
 numcodecs.register_codec(GRIBCodec, "grib")
 
 
+@dataclass(frozen=True)
+class GRIBZarrCodec(ArrayBytesCodec):
+    eclock = threading.RLock()
+
+    var: str
+    dtype: np.dtype
+
+    def __init__(self, *, var: str, dtype: np.dtype) -> None:
+        object.__setattr__(self, "var", var)
+        object.__setattr__(self, "dtype", dtype)
+
+    @classmethod
+    def from_dict(cls, data: dict[str, JSON]) -> Self:
+        _, configuration_parsed = parse_named_configuration(
+            data, "bytes", require_configuration=True
+        )
+        configuration_parsed = configuration_parsed or {}
+        return cls(**configuration_parsed)  # type: ignore[arg-type]
+
+    def to_dict(self) -> dict[str, JSON]:
+        if self.endian is None:
+            return {"name": "grib"}
+        else:
+            return {
+                "name": "grib",
+                "configuration": {"var": self.var, "dtype": self.dtype},
+            }
+        
+    async def _decode_single(
+        self,
+        chunk_bytes: Buffer,
+        chunk_spec: ArraySpec,
+    ) -> NDBuffer:
+        assert isinstance(chunk_bytes, Buffer)
+        import eccodes
+
+        if self.var in ["latitude", "longitude"]:
+            var = self.var + "s"
+            dt = self.dtype or "float64"
+        else:
+            var = "values"
+            dt = self.dtype or "float32"
+
+        with self.eclock:
+            mid = eccodes.codes_new_from_message(chunk_bytes.to_bytes())
+            try:
+                data = eccodes.codes_get_array(mid, var)
+                missingValue = eccodes.codes_get_string(mid, "missingValue")
+                if var == "values" and missingValue:
+                    data[data == float(missingValue)] = np.nan
+                return data.astype(dt, copy=False)
+
+            finally:
+                eccodes.codes_release(mid)
+
+    async def _encode_single(
+        self,
+        chunk_array: NDBuffer,
+        chunk_spec: ArraySpec,
+    ) -> Buffer | None:
+        # This is a one way codec
+        raise NotImplementedError
+
+    def compute_encoded_size(
+        self, input_byte_length: int, _chunk_spec: ArraySpec
+    ) -> int:
+        raise NotImplementedError
+
+
+register_codec("grib", GRIBZarrCodec)
+
+
 class AsciiTableCodec(numcodecs.abc.Codec):
     """Decodes ASCII-TABLE extensions in FITS files"""
 
@@ -166,7 +249,6 @@ def decode(self, buf, out=None):
         arr2 = np.empty((self.nrow,), dtype=dt_out)
         heap = buf[arr.nbytes :]
         for name in dt_out.names:
-
             if dt_out[name] == "O":
                 dt = np.dtype(self.ftypes[self.types[name]])
                 counts = arr[name][:, 0]
@@ -244,8 +326,7 @@ def encode(self, buf):
 class ZlibCodec(Codec):
     codec_id = "zlib"
 
-    def __init__(self):
-        ...
+    def __init__(self): ...
 
     def decode(self, data, out=None):
         if out:

From cbcb7208576277351fd57e8746b57698e1b2899c Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 29 Oct 2024 13:30:18 -0700
Subject: [PATCH 23/40] Fix typing

---
 kerchunk/codecs.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/kerchunk/codecs.py b/kerchunk/codecs.py
index 4804423e..46b19072 100644
--- a/kerchunk/codecs.py
+++ b/kerchunk/codecs.py
@@ -1,23 +1,19 @@
 import ast
 from dataclasses import dataclass
 import io
-from typing import TYPE_CHECKING
+from typing import Self, TYPE_CHECKING
 
 import numcodecs
 from numcodecs.abc import Codec
 import numpy as np
 import threading
 import zlib
+from zarr.core.array_spec import ArraySpec
 from zarr.abc.codec import ArrayBytesCodec
 from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
 from zarr.core.common import JSON, parse_enum, parse_named_configuration
 from zarr.registry import register_codec
 
-if TYPE_CHECKING:
-    from typing import Self
-
-    from zarr.core.array_spec import ArraySpec
-
 
 class FillStringsCodec(Codec):
     """Sets fixed-length string fields to empty

From b88655f3c0d9789e09dee99afdcf245a652d9b73 Mon Sep 17 00:00:00 2001
From: Nathan Zimmerman <npzimmerman@gmail.com>
Date: Wed, 6 Nov 2024 13:39:53 -0600
Subject: [PATCH 24/40] Fix some broken tests; use async filesystem wrapper

---
 kerchunk/tests/test_combine.py | 10 ++++++----
 kerchunk/utils.py              |  3 +++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py
index 868a39ff..0cfb9505 100644
--- a/kerchunk/tests/test_combine.py
+++ b/kerchunk/tests/test_combine.py
@@ -134,16 +134,18 @@
 # simple time arrays - xarray can't make these!
 m = fs.get_mapper("time1.zarr")
 z = zarr.open(m, mode="w", zarr_format=2)
-ar = z.create_dataset("time", data=np.array([1], dtype="M8[s]"))
+time1_array = np.array([1], dtype="M8[s]")
+ar = z.create_array("time", data=time1_array, shape=time1_array.shape)
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time"]})
-ar = z.create_dataset("data", data=arr)
+ar = z.create_array("data", data=arr, shape=arr.shape)
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time", "x", "y"]})
 
 m = fs.get_mapper("time2.zarr")
 z = zarr.open(m, mode="w", zarr_format=2)
-ar = z.create_dataset("time", data=np.array([2], dtype="M8[s]"))
+time2_array = np.array([2], dtype="M8[s]")
+ar = z.create_array("time", data=time2_array, shape=time2_array.shape)
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time"]})
-ar = z.create_dataset("data", data=arr)
+ar = z.create_array("data", data=arr, shape=arr.shape)
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time", "x", "y"]})
 
 
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 8cc2f765..5916ebef 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -9,6 +9,7 @@
 import ujson
 
 import fsspec
+from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
 import numpy as np
 import zarr
 
@@ -70,6 +71,8 @@ def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, mode="r"):
     zarr.storage.Store or zarr.storage.Mapper, fsspec.AbstractFileSystem
     """
     if is_zarr3():
+        if not fs.async_impl:
+            fs = AsyncFileSystemWrapper(fs)
         return zarr.storage.RemoteStore(fs, mode=mode)
     else:
         return fs.get_mapper()

From 73eaf33a80801d86afc2f289a33ee56de101f423 Mon Sep 17 00:00:00 2001
From: Nathan Zimmerman <npzimmerman@gmail.com>
Date: Tue, 19 Nov 2024 18:02:01 -0600
Subject: [PATCH 25/40] Implement zarr3 compatibility for grib

---
 kerchunk/combine.py         |  38 ++++++++------
 kerchunk/grib2.py           |  14 ++---
 kerchunk/tests/test_grib.py | 100 ++++++++++++++++++------------------
 kerchunk/utils.py           |  15 +++---
 4 files changed, 88 insertions(+), 79 deletions(-)

diff --git a/kerchunk/combine.py b/kerchunk/combine.py
index b02fa395..777853d2 100644
--- a/kerchunk/combine.py
+++ b/kerchunk/combine.py
@@ -11,7 +11,7 @@
 import ujson
 import zarr
 
-from kerchunk.utils import consolidate
+from kerchunk.utils import consolidate, fs_as_store, translate_refs_serializable
 
 logger = logging.getLogger("kerchunk.combine")
 
@@ -199,6 +199,7 @@ def append(
             remote_protocol=remote_protocol,
             remote_options=remote_options,
             target_options=target_options,
+            asynchronous=True
         )
         ds = xr.open_dataset(
             fs.get_mapper(), engine="zarr", backend_kwargs={"consolidated": False}
@@ -264,7 +265,7 @@ def fss(self):
                 self._paths = []
                 for of in fsspec.open_files(self.path, **self.target_options):
                     self._paths.append(of.full_name)
-                fs = fsspec.core.url_to_fs(self.path[0], **self.target_options)[0]
+                fs = fsspec.core.url_to_fs(self.path[0], asynchronous=True, **self.target_options)[0]
                 try:
                     # JSON path
                     fo_list = fs.cat(self.path)
@@ -360,7 +361,8 @@ def first_pass(self):
                 fs._dircache_from_items()
 
             logger.debug("First pass: %s", i)
-            z = zarr.open_group(fs.get_mapper(""), zarr_format=2)
+            z_store = fs_as_store(fs, read_only=False)
+            z = zarr.open_group(z_store, zarr_format=2)
             for var in self.concat_dims:
                 value = self._get_value(i, z, var, fn=self._paths[i])
                 if isinstance(value, np.ndarray):
@@ -386,10 +388,10 @@ def store_coords(self):
         Write coordinate arrays into the output
         """
         kv = {}
-        store = zarr.storage.KVStore(kv)
-        group = zarr.open(store, zarr_format=2)
-        m = self.fss[0].get_mapper("")
-        z = zarr.open(m)
+        store = zarr.storage.MemoryStore(kv)
+        group = zarr.open_group(store, zarr_format=2)
+        m = fs_as_store(self.fss[0], read_only=False)
+        z = zarr.open(m, zarr_format=2)
         for k, v in self.coos.items():
             if k == "var":
                 # The names of the variables to write in the second pass, not a coordinate
@@ -420,10 +422,11 @@ def store_coords(self):
                 elif k in z:
                     # Fall back to existing fill value
                     kw["fill_value"] = z[k].fill_value
-            arr = group.create_dataset(
+            arr = group.create_array(
                 name=k,
                 data=data,
-                overwrite=True,
+                shape=data.shape,
+                exists_ok=True,
                 compressor=compression,
                 dtype=self.coo_dtypes.get(k, data.dtype),
                 **kw,
@@ -443,8 +446,8 @@ def store_coords(self):
         logger.debug("Written coordinates")
         for fn in [".zgroup", ".zattrs"]:
             # top-level group attributes from first input
-            if fn in m:
-                self.out[fn] = ujson.dumps(ujson.loads(m[fn]))
+            if m.fs.exists(fn):
+                self.out[fn] = ujson.dumps(ujson.loads(m.fs.cat(fn)))
         logger.debug("Written global metadata")
         self.done.add(2)
 
@@ -460,7 +463,7 @@ def second_pass(self):
 
         for i, fs in enumerate(self.fss):
             to_download = {}
-            m = fs.get_mapper("")
+            m = fs_as_store(fs, read_only=False)
             z = zarr.open(m, zarr_format=2)
 
             if no_deps is None:
@@ -491,9 +494,9 @@ def second_pass(self):
                 if f"{v}/.zgroup" in fns:
                     # recurse into groups - copy meta, add to dirs to process and don't look
                     # for references in this dir
-                    self.out[f"{v}/.zgroup"] = m[f"{v}/.zgroup"]
+                    self.out[f"{v}/.zgroup"] = m.fs.cat(f"{v}/.zgroup")
                     if f"{v}/.zattrs" in fns:
-                        self.out[f"{v}/.zattrs"] = m[f"{v}/.zattrs"]
+                        self.out[f"{v}/.zattrs"] = m.fs.cat(f"{v}/.zattrs")
                     dirs.extend([f for f in fns if not f.startswith(f"{v}/.z")])
                     continue
                 if v in self.identical_dims:
@@ -505,7 +508,7 @@ def second_pass(self):
                     continue
                 logger.debug("Second pass: %s, %s", i, v)
 
-                zarray = ujson.loads(m[f"{v}/.zarray"])
+                zarray = ujson.loads(m.fs.cat(f"{v}/.zarray"))
                 if v not in chunk_sizes:
                     chunk_sizes[v] = zarray["chunks"]
                 elif chunk_sizes[v] != zarray["chunks"]:
@@ -516,7 +519,10 @@ def second_pass(self):
                         chunks so far: {zarray["chunks"]}"""
                     )
                 chunks = chunk_sizes[v]
-                zattrs = ujson.loads(m.get(f"{v}/.zattrs", "{}"))
+                if m.fs.exists(f"{v}/.zattrs"):
+                    zattrs = ujson.loads(m.fs.cat(f"{v}/.zattrs"))
+                else:
+                    zattrs = ujson.loads({})
                 coords = zattrs.get("_ARRAY_DIMENSIONS", [])
                 if zarray["shape"] and not coords:
                     coords = list("ikjlm")[: len(zarray["shape"])]
diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py
index e4e64bf3..686a71a0 100644
--- a/kerchunk/grib2.py
+++ b/kerchunk/grib2.py
@@ -11,7 +11,7 @@
 import xarray
 import numpy as np
 
-from kerchunk.utils import class_factory, _encode_for_JSON, dict_to_store, translate_refs_serializable
+from kerchunk.utils import class_factory, _encode_for_JSON, dict_to_store, fs_as_store, translate_refs_serializable
 from kerchunk.codecs import GRIBCodec
 from kerchunk.combine import MultiZarrToZarr, drop
 from kerchunk._grib_idx import parse_grib_idx, build_idx_grib_mapping, map_from_index
@@ -520,17 +520,18 @@ def grib_tree(
 
         for key, value in group["refs"].items():
             if key not in [".zattrs", ".zgroup"]:
-                zarr_store[f"{path}/{key}"] = value
+                zarr_store._store_dict[f"{path}/{key}"] = value
 
     # Force all stored values to decode as string, not bytes. String should be correct.
     # ujson will reject bytes values by default.
     # Using 'reject_bytes=False' one write would fail an equality check on read.
-    zarr_store = {
+    zarr_dict = {
         key: (val.decode() if isinstance(val, bytes) else val)
-        for key, val in zarr_store.items()
+        for key, val in zarr_store._store_dict.items()
     }
     # TODO handle other kerchunk reference spec versions?
-    result = dict(refs=zarr_store, version=1)
+    translate_refs_serializable(zarr_dict)
+    result = dict(refs=zarr_dict, version=1)
 
     return result
 
@@ -571,7 +572,8 @@ def correct_hrrr_subhf_step(group: Dict) -> Dict:
     group["refs"][".zattrs"] = ujson.dumps(attrs)
 
     fo = fsspec.filesystem("reference", fo=group, mode="r")
-    xd = xarray.open_dataset(fo.get_mapper(), engine="zarr", consolidated=False)
+    fstore = fs_as_store(fo, read_only=True)
+    xd = xarray.open_dataset(fstore, engine="zarr", consolidated=False)
 
     correct_step = xd.valid_time.values - xd.time.values
 
diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py
index 7d9cf32b..9bc90b71 100644
--- a/kerchunk/tests/test_grib.py
+++ b/kerchunk/tests/test_grib.py
@@ -21,7 +21,7 @@
     extract_dataset_chunk_index,
     extract_datatree_chunk_index,
 )
-from kerchunk.utils import refs_as_store
+from kerchunk.utils import fs_as_store, refs_as_store
 
 eccodes_ver = tuple(int(i) for i in eccodes.__version__.split("."))
 cfgrib = pytest.importorskip("cfgrib")
@@ -70,7 +70,7 @@ def test_archives(tmpdir, url):
     grib = GribToZarr(url, storage_options={"anon": True}, skip=1)
     out = grib.translate()[0]
 
-    store = refs_as_store(out)
+    store = refs_as_store(out, remote_options={"anon": True})
 
     ours = xr.open_zarr(
         store,
@@ -116,7 +116,8 @@ def test_grib_tree():
     corrected_msg_groups = [correct_hrrr_subhf_step(msg) for msg in scanned_msg_groups]
     result = grib_tree(corrected_msg_groups)
     fs = fsspec.filesystem("reference", fo=result)
-    zg = zarr.open_group(fs.get_mapper(""), zarr_format=2)
+    store = fs_as_store(fs)
+    zg = zarr.open_group(store, mode="r", zarr_format=2)
     assert isinstance(zg["refc/instant/atmosphere/refc"], zarr.Array)
     assert isinstance(zg["vbdsf/avg/surface/vbdsf"], zarr.Array)
     assert set(zg["vbdsf/avg/surface"].attrs["coordinates"].split()) == set(
@@ -126,7 +127,7 @@ def test_grib_tree():
         "atmosphere latitude longitude step time valid_time".split()
     )
     # Assert that the fill value is set correctly
-    assert zg.refc.instant.atmosphere.step.fill_value is np.nan
+    assert np.isnan(zg['refc/instant/atmosphere/step'].fill_value)
 
 
 # The following two tests use json fixture data generated from calling scan grib
@@ -144,14 +145,14 @@ def test_correct_hrrr_subhf_group_step():
         scanned_msgs = ujson.load(fobj)
 
     original_zg = [
-        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""), zarr_format=2)
+        zarr.open_group(fs_as_store(fsspec.filesystem("reference", fo=val)), mode="r", zarr_format=2)
         for val in scanned_msgs
     ]
 
     corrected_msgs = [correct_hrrr_subhf_step(msg) for msg in scanned_msgs]
 
     corrected_zg = [
-        zarr.open_group(fsspec.filesystem("reference", fo=val).get_mapper(""), zarr_format=2)
+        zarr.open_group(fs_as_store(fsspec.filesystem("reference", fo=val)), mode="r", zarr_format=2)
         for val in corrected_msgs
     ]
 
@@ -160,10 +161,10 @@ def test_correct_hrrr_subhf_group_step():
     assert not all(["step" in zg.array_keys() for zg in original_zg])
 
     # The step values are corrected to floating point hour
-    assert all([zg.step[()] <= 1.0 for zg in corrected_zg])
+    assert all([zg["step"][()] <= 1.0 for zg in corrected_zg])
     # The original seems to have values in minutes for some step variables!
     assert not all(
-        [zg.step[()] <= 1.0 for zg in original_zg if "step" in zg.array_keys()]
+        [zg["step"][()] <= 1.0 for zg in original_zg if "step" in zg.array_keys()]
     )
 
 
@@ -174,36 +175,32 @@ def test_hrrr_subhf_corrected_grib_tree():
 
     corrected_msgs = [correct_hrrr_subhf_step(msg) for msg in scanned_msgs]
     merged = grib_tree(corrected_msgs)
-    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""), zarr_format=2)
+    z_fs = fsspec.filesystem("reference", fo=merged, asynchronous=True)
+    zstore = fs_as_store(z_fs)
+    zg = zarr.open_group(zstore, mode="r", zarr_format=2)
     # Check the values and shape of the time coordinates
-    assert zg.u.instant.heightAboveGround.step[:].tolist() == [
+    assert zg['u/instant/heightAboveGround/step'][:].tolist() == [
         0.0,
         0.25,
         0.5,
         0.75,
         1.0,
     ]
-    assert zg.u.instant.heightAboveGround.step.shape == (5,)
-
-    assert zg.u.instant.heightAboveGround.valid_time[:].tolist() == [
+    assert zg['u/instant/heightAboveGround/step'].shape == (5,)
+    assert zg['u/instant/heightAboveGround/valid_time'][:].tolist() == [
         [1695862800, 1695863700, 1695864600, 1695865500, 1695866400]
     ]
-    assert zg.u.instant.heightAboveGround.valid_time.shape == (1, 5)
-
-    assert zg.u.instant.heightAboveGround.time[:].tolist() == [1695862800]
-    assert zg.u.instant.heightAboveGround.time.shape == (1,)
-
-    assert zg.dswrf.avg.surface.step[:].tolist() == [0.0, 0.25, 0.5, 0.75, 1.0]
-    assert zg.dswrf.avg.surface.step.shape == (5,)
-
-    assert zg.dswrf.avg.surface.valid_time[:].tolist() == [
+    assert zg['u/instant/heightAboveGround/valid_time'].shape == (1, 5)
+    assert zg['u/instant/heightAboveGround/time'][:].tolist() == [1695862800]
+    assert zg['u/instant/heightAboveGround/time'].shape == (1,)
+    assert zg['dswrf/avg/surface/step'][:].tolist() == [0.0, 0.25, 0.5, 0.75, 1.0]
+    assert zg['dswrf/avg/surface/step'].shape == (5,)
+    assert zg['dswrf/avg/surface/valid_time'][:].tolist() == [
         [1695862800, 1695863700, 1695864600, 1695865500, 1695866400]
     ]
-    assert zg.dswrf.avg.surface.valid_time.shape == (1, 5)
-
-    assert zg.dswrf.avg.surface.time[:].tolist() == [1695862800]
-    assert zg.dswrf.avg.surface.time.shape == (1,)
-
+    assert zg['dswrf/avg/surface/valid_time'].shape == (1, 5)
+    assert zg['dswrf/avg/surface/time'][:].tolist() == [1695862800]
+    assert zg['dswrf/avg/surface/time'].shape == (1,)
 
 # The following two test use json fixture data generated from calling scan grib
 #   scan_grib("testdata/hrrr.t01z.wrfsfcf00.grib2")
@@ -217,24 +214,22 @@ def test_hrrr_sfcf_grib_tree():
     with open(fpath, "rb") as fobj:
         scanned_msgs = ujson.load(fobj)
     merged = grib_tree(scanned_msgs)
-    zg = zarr.open_group(fsspec.filesystem("reference", fo=merged).get_mapper(""), zarr_format=2)
+    store = fs_as_store(fsspec.filesystem("reference", fo=merged))
+    zg = zarr.open_group(store, mode="r", zarr_format=2)
     # Check the heightAboveGround level shape of the time coordinates
-    assert zg.u.instant.heightAboveGround.heightAboveGround[()] == 80.0
-    assert zg.u.instant.heightAboveGround.heightAboveGround.shape == ()
-
-    assert zg.u.instant.heightAboveGround.step[:].tolist() == [0.0, 1.0]
-    assert zg.u.instant.heightAboveGround.step.shape == (2,)
-
-    assert zg.u.instant.heightAboveGround.valid_time[:].tolist() == [
+    assert zg['u/instant/heightAboveGround/heightAboveGround'][()] == 80.0
+    assert zg['u/instant/heightAboveGround/heightAboveGround'].shape == ()
+    assert zg['u/instant/heightAboveGround/step'][:].tolist() == [0.0, 1.0]
+    assert zg['u/instant/heightAboveGround/step'].shape == (2,)
+    assert zg['u/instant/heightAboveGround/valid_time'][:].tolist() == [
         [1695862800, 1695866400]
     ]
-    assert zg.u.instant.heightAboveGround.valid_time.shape == (1, 2)
-
-    assert zg.u.instant.heightAboveGround.time[:].tolist() == [1695862800]
-    assert zg.u.instant.heightAboveGround.time.shape == (1,)
+    assert zg['u/instant/heightAboveGround/valid_time'].shape == (1, 2)
+    assert zg['u/instant/heightAboveGround/time'][:].tolist() == [1695862800]
+    assert zg['u/instant/heightAboveGround/time'].shape == (1,)
 
     # Check the isobaricInhPa level shape and time coordinates
-    assert zg.u.instant.isobaricInhPa.isobaricInhPa[:].tolist() == [
+    assert zg['u/instant/isobaricInhPa/isobaricInhPa'][:].tolist() == [
         250.0,
         300.0,
         500.0,
@@ -243,10 +238,9 @@ def test_hrrr_sfcf_grib_tree():
         925.0,
         1000.0,
     ]
-    assert zg.u.instant.isobaricInhPa.isobaricInhPa.shape == (7,)
-
-    assert zg.u.instant.isobaricInhPa.step[:].tolist() == [0.0, 1.0]
-    assert zg.u.instant.isobaricInhPa.step.shape == (2,)
+    assert zg['u/instant/isobaricInhPa/isobaricInhPa'].shape == (7,)
+    assert zg['u/instant/isobaricInhPa/step'][:].tolist() == [0.0, 1.0]
+    assert zg['u/instant/isobaricInhPa/step'].shape == (2,)
 
     # Valid time values get exploded by isobaricInhPa aggregation
     # Is this a feature or a bug?
@@ -256,11 +250,11 @@ def test_hrrr_sfcf_grib_tree():
             [1695866400 for _ in range(7)],
         ]
     ]
-    assert zg.u.instant.isobaricInhPa.valid_time[:].tolist() == expected_valid_times
-    assert zg.u.instant.isobaricInhPa.valid_time.shape == (1, 2, 7)
+    assert zg['u/instant/isobaricInhPa/valid_time'][:].tolist() == expected_valid_times
+    assert zg['u/instant/isobaricInhPa/valid_time'].shape == (1, 2, 7)
 
-    assert zg.u.instant.isobaricInhPa.time[:].tolist() == [1695862800]
-    assert zg.u.instant.isobaricInhPa.time.shape == (1,)
+    assert zg['u/instant/isobaricInhPa/time'][:].tolist() == [1695862800]
+    assert zg['u/instant/isobaricInhPa/time'].shape == (1,)
 
 
 # def test_hrrr_sfcf_grib_datatree():
@@ -290,11 +284,14 @@ def test_parse_grib_idx_invalid_url():
 
 
 def test_parse_grib_idx_no_file():
-    with pytest.raises(FileNotFoundError):
+    # How did this ever work? 403s are returned for anonymous calls to non-existent
+    #  files iirc as a security measure to obscure results/avoid tests for existence 
+    #with pytest.raises(FileNotFoundError):
+    with pytest.raises(PermissionError):
         # the url is spelled wrong
         parse_grib_idx(
             "s3://noaahrrr-bdp-pds/hrrr.20220804/conus/hrrr.t01z.wrfsfcf01.grib2",
-            storage_options=dict(anon=True),
+            storage_options={"anon": True},
         )
 
 
@@ -355,6 +352,7 @@ def test_parse_grib_idx_content(idx_url, storage_options):
 #     return tree_store, dt_instance, fn
 
 
+@pytest.mark.skip(reason="datatree support should be updated to use xarray.Datatree")
 def test_extract_dataset_chunk_index(zarr_tree_and_datatree_instance):
     tree_store, dt_instance, fn = zarr_tree_and_datatree_instance
 
@@ -385,6 +383,7 @@ def test_extract_dataset_chunk_index(zarr_tree_and_datatree_instance):
     )
 
 
+@pytest.mark.skip(reason="datatree support should be updated to use xarray.Datatree")
 def test_extract_datatree_chunk_index(zarr_tree_and_datatree_instance):
     tree_store, dt_instance, fn = zarr_tree_and_datatree_instance
 
@@ -438,6 +437,7 @@ def test_extract_datatree_chunk_index(zarr_tree_and_datatree_instance):
     ).all()
 
 
+@pytest.mark.skip(reason="datatree support should be updated to use xarray.Datatree")
 def test_extract_methods_grib_parameter(zarr_tree_and_datatree_instance):
     tree_store, dt_instance, _ = zarr_tree_and_datatree_instance
 
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 5916ebef..b918aa1d 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -22,11 +22,12 @@ def refs_as_fs(refs, remote_protocol=None, remote_options=None, **kwargs):
         remote_protocol=remote_protocol,
         remote_options=remote_options,
         **kwargs,
+        asynchronous=True
     )
     return fs
 
 
-def refs_as_store(refs, mode="r", remote_protocol=None, remote_options=None):
+def refs_as_store(refs, read_only=True, remote_protocol=None, remote_options=None):
     """Convert a reference set to a zarr store"""
     asynchronous = False
     if is_zarr3():
@@ -39,10 +40,9 @@ def refs_as_store(refs, mode="r", remote_protocol=None, remote_options=None):
     fs = refs_as_fs(
         refs,
         remote_protocol=remote_protocol,
-        remote_options=remote_options,
-        asynchronous=asynchronous,
+        remote_options=remote_options
     )
-    return fs_as_store(fs, mode=mode)
+    return fs_as_store(fs, read_only=True)
 
 
 def is_zarr3():
@@ -53,12 +53,12 @@ def is_zarr3():
 def dict_to_store(store_dict: dict):
     """Create an in memory zarr store backed by the given dictionary"""
     if is_zarr3():
-        return zarr.storage.MemoryStore(mode="w", store_dict=store_dict)
+        return zarr.storage.MemoryStore(read_only=False, store_dict=store_dict)
     else:
         return zarr.storage.KVStore(store_dict)
 
 
-def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, mode="r"):
+def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, read_only=True):
     """Open the refs as a zarr store
 
     Parameters
@@ -73,7 +73,8 @@ def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, mode="r"):
     if is_zarr3():
         if not fs.async_impl:
             fs = AsyncFileSystemWrapper(fs)
-        return zarr.storage.RemoteStore(fs, mode=mode)
+        fs.asynchronous = True
+        return zarr.storage.RemoteStore(fs, read_only=read_only)
     else:
         return fs.get_mapper()
 

From 37571995c70573613ead3c8cf0f1c14c54640f43 Mon Sep 17 00:00:00 2001
From: Nathan Zimmerman <npzimmerman@gmail.com>
Date: Thu, 21 Nov 2024 16:24:05 -0600
Subject: [PATCH 26/40] Use zarr3 stores directly; avoid use of internal fs

---
 kerchunk/combine.py | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/kerchunk/combine.py b/kerchunk/combine.py
index 777853d2..841b9e8a 100644
--- a/kerchunk/combine.py
+++ b/kerchunk/combine.py
@@ -1,3 +1,4 @@
+import asyncio
 import collections.abc
 import logging
 import re
@@ -10,6 +11,7 @@
 import numcodecs
 import ujson
 import zarr
+from zarr.core.buffer.core import default_buffer_prototype
 
 from kerchunk.utils import consolidate, fs_as_store, translate_refs_serializable
 
@@ -349,6 +351,16 @@ def _get_value(self, index, z, var, fn=None):
         logger.debug("Decode: %s -> %s", (selector, index, var, fn), o)
         return o
 
+    async def _read_meta_files(self, m, files):
+        """Helper to load multiple metadata files asynchronously"""
+        res = {}
+        for fn in files:
+            exists = await m.exists(fn)
+            if exists:
+                content = await m.get(fn, prototype=default_buffer_prototype())
+                res[fn] = ujson.dumps(ujson.loads(content.to_bytes()))
+        return res
+
     def first_pass(self):
         """Accumulate the set of concat coords values across all inputs"""
 
@@ -444,10 +456,9 @@ def store_coords(self):
             # TODO: rewrite .zarray/.zattrs with ujson to save space. Maybe make them by hand anyway.
         self.out.update(kv)
         logger.debug("Written coordinates")
-        for fn in [".zgroup", ".zattrs"]:
-            # top-level group attributes from first input
-            if m.fs.exists(fn):
-                self.out[fn] = ujson.dumps(ujson.loads(m.fs.cat(fn)))
+
+        metadata = asyncio.run(self._read_meta_files(m, [".zgroup", ".zattrs"]))
+        self.out.update(metadata)
         logger.debug("Written global metadata")
         self.done.add(2)
 
@@ -494,9 +505,8 @@ def second_pass(self):
                 if f"{v}/.zgroup" in fns:
                     # recurse into groups - copy meta, add to dirs to process and don't look
                     # for references in this dir
-                    self.out[f"{v}/.zgroup"] = m.fs.cat(f"{v}/.zgroup")
-                    if f"{v}/.zattrs" in fns:
-                        self.out[f"{v}/.zattrs"] = m.fs.cat(f"{v}/.zattrs")
+                    metadata = asyncio.run(self._read_meta_files(m, [f"{v}/.zgroup", f"{v}/.zattrs"]))
+                    self.out.update(metadata)
                     dirs.extend([f for f in fns if not f.startswith(f"{v}/.z")])
                     continue
                 if v in self.identical_dims:
@@ -507,8 +517,9 @@ def second_pass(self):
                             self.out[k] = fs.references[k]
                     continue
                 logger.debug("Second pass: %s, %s", i, v)
-
-                zarray = ujson.loads(m.fs.cat(f"{v}/.zarray"))
+                
+                zarray = asyncio.run(self._read_meta_files(m, [f"{v}/.zarray"]))[f"{v}/.zarray"]
+                zarray = ujson.loads(zarray)
                 if v not in chunk_sizes:
                     chunk_sizes[v] = zarray["chunks"]
                 elif chunk_sizes[v] != zarray["chunks"]:
@@ -519,10 +530,8 @@ def second_pass(self):
                         chunks so far: {zarray["chunks"]}"""
                     )
                 chunks = chunk_sizes[v]
-                if m.fs.exists(f"{v}/.zattrs"):
-                    zattrs = ujson.loads(m.fs.cat(f"{v}/.zattrs"))
-                else:
-                    zattrs = ujson.loads({})
+                zattr_meta = asyncio.run(self._read_meta_files(m, [f"{v}/.zattrs"]))
+                zattrs = ujson.loads(zattr_meta.get(f"{v}/.zattrs", {}))
                 coords = zattrs.get("_ARRAY_DIMENSIONS", [])
                 if zarray["shape"] and not coords:
                     coords = list("ikjlm")[: len(zarray["shape"])]

From d8848ce5cb621493258efd468619e9eecfc10f4b Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 16:25:52 -0500
Subject: [PATCH 27/40] Forward

---
 kerchunk/fits.py             |  2 +-
 kerchunk/hdf.py              |  2 +-
 kerchunk/hdf4.py             |  1 -
 kerchunk/netCDF3.py          |  2 +-
 kerchunk/tests/test_utils.py | 24 +++++++++++++-----------
 kerchunk/utils.py            | 20 +++++++++-----------
 6 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/kerchunk/fits.py b/kerchunk/fits.py
index f0d4fa8e..f4d181ad 100644
--- a/kerchunk/fits.py
+++ b/kerchunk/fits.py
@@ -249,7 +249,7 @@ def add_wcs_coords(hdu, zarr_group=None, dataset=None, dtype="float32"):
         }
         if zarr_group is not None:
             arr = zarr_group.empty(
-                name, shape=shape, chunks=shape, overwrite=True, dtype=dtype
+                name, shape=shape, chunks=shape, dtype=dtype
             )
             arr.attrs.update(attrs)
             arr[:] = world_coord.value.reshape(shape)
diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 1d4d0054..f72bf8a2 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -107,7 +107,7 @@ def __init__(
         self.vlen = vlen_encode
         self.store_dict = out or {}
         self.store = dict_to_store(self.store_dict)
-        self._zroot = zarr.group(store=self.store, zarr_format=2, overwrite=True)
+        self._zroot = zarr.group(store=self.store, zarr_format=2)
         self._uri = url
         self.error = error
         lggr.debug(f"HDF5 file URI: {self._uri}")
diff --git a/kerchunk/hdf4.py b/kerchunk/hdf4.py
index 8339659b..92b738c7 100644
--- a/kerchunk/hdf4.py
+++ b/kerchunk/hdf4.py
@@ -155,7 +155,6 @@ def translate(self, filename=None, storage_options=None):
                     dtype=v["dtype"],
                     chunks=v.get("chunks", v["dims"]),
                     compressor=compression,
-                    overwrite=True,
                 )
                 arr.attrs.update(
                     dict(
diff --git a/kerchunk/netCDF3.py b/kerchunk/netCDF3.py
index 31438bb0..af410784 100644
--- a/kerchunk/netCDF3.py
+++ b/kerchunk/netCDF3.py
@@ -169,7 +169,7 @@ def translate(self):
 
         out = self.out
         store = dict_to_store(out)
-        z = zarr.open(store, mode="w", zarr_format=2, overwrite=True)
+        z = zarr.open_group(store, mode="w", zarr_format=2)
 
         for dim, var in self.variables.items():
             if dim in self.chunks:
diff --git a/kerchunk/tests/test_utils.py b/kerchunk/tests/test_utils.py
index a951c36c..701427e2 100644
--- a/kerchunk/tests/test_utils.py
+++ b/kerchunk/tests/test_utils.py
@@ -72,21 +72,20 @@ def test_inline_array():
         "data/1": b"\x02\x00\x00\x00",
         "data/.zattrs": '{"foo": "bar"}',
     }
-    fs = fsspec.filesystem("reference", fo=refs)
     out1 = kerchunk.utils.inline_array(refs, threshold=1)  # does nothing
     assert out1 == refs
     out2 = kerchunk.utils.inline_array(refs, threshold=1, names=["data"])  # explicit
-    assert "data/1" not in out2
+    assert "data/1" not in out2  # TODO: Is this wrong? I dont think zarr deletes existing chunks when overwriting
     assert json.loads(out2["data/.zattrs"]) == json.loads(refs["data/.zattrs"])
-    fs = fsspec.filesystem("reference", fo=out2)
-    g = zarr.open(fs.get_mapper(), zarr_format=2)
-    assert g.data[:].tolist() == [1, 2]
+    store = kerchunk.utils.refs_as_store(out2)
+    g = zarr.open(store, mode='r', zarr_format=2)
+    assert g.data[:].tolist() == [1, 2] # What is g.data???
 
     out3 = kerchunk.utils.inline_array(refs, threshold=1000)  # inlines because of size
     assert "data/1" not in out3
-    fs = fsspec.filesystem("reference", fo=out3)
-    g = zarr.open(fs.get_mapper(), zarr_format=2)
-    assert g.data[:].tolist() == [1, 2]
+    store = kerchunk.utils.refs_as_store(out3)
+    g = zarr.open(store, mode='r', zarr_format=2)
+    assert g.data[:].tolist() == [1, 2] # What is g.data???
 
 
 def test_json():
@@ -113,9 +112,12 @@ def test_subchunk_exact(m, chunks):
         f"data/{_}.0" for _ in range(nchunk)
     ]
 
-    g2 = zarr.open_group(
-        "reference://", storage_options={"fo": out, "remote_protocol": "memory"}, zarr_format=2
-    )
+    store = kerchunk.utils.refs_as_store(out, remote_protocol="memory")
+    g2 = zarr.open_group(store, mode='r', zarr_format=2)
+
+    # g2 = zarr.open_group(
+    #     "reference://", storage_options={"fo": out, "remote_protocol": "memory"}, zarr_format=2
+    # )
     assert (g2.data[:] == data).all()
 
 
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index b918aa1d..9bc7686e 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -20,7 +20,7 @@ def refs_as_fs(refs, remote_protocol=None, remote_options=None, **kwargs):
         "reference",
         fo=refs,
         remote_protocol=remote_protocol,
-        remote_options=remote_options,
+        # remote_options=remote_options,
         **kwargs,
         asynchronous=True
     )
@@ -29,9 +29,7 @@ def refs_as_fs(refs, remote_protocol=None, remote_options=None, **kwargs):
 
 def refs_as_store(refs, read_only=True, remote_protocol=None, remote_options=None):
     """Convert a reference set to a zarr store"""
-    asynchronous = False
     if is_zarr3():
-        asynchronous = True
         if remote_options is None:
             remote_options = {"asynchronous": True}
         else:
@@ -40,14 +38,14 @@ def refs_as_store(refs, read_only=True, remote_protocol=None, remote_options=Non
     fs = refs_as_fs(
         refs,
         remote_protocol=remote_protocol,
-        remote_options=remote_options
+        remote_options=remote_options, 
     )
-    return fs_as_store(fs, read_only=True)
+    return fs_as_store(fs, read_only=read_only)
 
 
 def is_zarr3():
     """Check if the installed zarr version is version 3"""
-    return Version(zarr.__version__) >= Version("3.0.0.a0")
+    return Version(zarr.__version__) >= Version("3.0.0.b2")
 
 
 def dict_to_store(store_dict: dict):
@@ -71,6 +69,7 @@ def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, read_only=True):
     zarr.storage.Store or zarr.storage.Mapper, fsspec.AbstractFileSystem
     """
     if is_zarr3():
+        print(fs.async_impl is None)
         if not fs.async_impl:
             fs = AsyncFileSystemWrapper(fs)
         fs.asynchronous = True
@@ -288,7 +287,7 @@ def do_inline(store, threshold, remote_options=None, remote_protocol=None):
 
 
 def _inline_array(group, threshold, names, prefix=""):
-    for name, thing in group.items():
+    for name, thing in group.members():
         if prefix:
             prefix1 = f"{prefix}.{name}"
         else:
@@ -306,9 +305,8 @@ def _inline_array(group, threshold, names, prefix=""):
                     shape=thing.shape,
                     data=thing[:],
                     chunks=thing.shape,
-                    compression=None,
-                    overwrite=True,
                     fill_value=thing.fill_value,
+                    exists_ok=True,
                 )
                 arr.attrs.update(original_attrs)
 
@@ -338,8 +336,8 @@ def inline_array(store, threshold=1000, names=None, remote_options=None):
     amended references set (simple style)
     """
     fs = refs_as_fs(store, remote_options=remote_options or {})
-    zarr_store = fs_as_store(fs, mode="r+", remote_options=remote_options or {})
-    g = zarr.open_group(zarr_store, mode="r+", zarr_format=2)
+    zarr_store = fs_as_store(fs, read_only=False)
+    g = zarr.open_group(zarr_store, zarr_format=2)
     _inline_array(g, threshold, names=names or [])
     return fs.references
 

From 1fa294e145962ea6472bc53bdcbd69fedd66a69b Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 16:29:25 -0500
Subject: [PATCH 28/40] More

---
 kerchunk/fits.py    | 2 +-
 kerchunk/hdf4.py    | 1 +
 kerchunk/netCDF3.py | 1 +
 kerchunk/utils.py   | 3 +--
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/kerchunk/fits.py b/kerchunk/fits.py
index f4d181ad..2e84120f 100644
--- a/kerchunk/fits.py
+++ b/kerchunk/fits.py
@@ -249,7 +249,7 @@ def add_wcs_coords(hdu, zarr_group=None, dataset=None, dtype="float32"):
         }
         if zarr_group is not None:
             arr = zarr_group.empty(
-                name, shape=shape, chunks=shape, dtype=dtype
+                name, shape=shape, chunks=shape, dtype=dtype, exists_ok=True
             )
             arr.attrs.update(attrs)
             arr[:] = world_coord.value.reshape(shape)
diff --git a/kerchunk/hdf4.py b/kerchunk/hdf4.py
index 92b738c7..16b08740 100644
--- a/kerchunk/hdf4.py
+++ b/kerchunk/hdf4.py
@@ -155,6 +155,7 @@ def translate(self, filename=None, storage_options=None):
                     dtype=v["dtype"],
                     chunks=v.get("chunks", v["dims"]),
                     compressor=compression,
+                    exists_ok=True,
                 )
                 arr.attrs.update(
                     dict(
diff --git a/kerchunk/netCDF3.py b/kerchunk/netCDF3.py
index af410784..457aafbb 100644
--- a/kerchunk/netCDF3.py
+++ b/kerchunk/netCDF3.py
@@ -255,6 +255,7 @@ def translate(self):
                     fill_value=fill,
                     chunks=(1,) + dtype.shape,
                     compressor=None,
+                    exists_ok=True,
                 )
                 arr.attrs.update(
                     {
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 9bc7686e..bb9cd4cb 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -27,7 +27,7 @@ def refs_as_fs(refs, remote_protocol=None, remote_options=None, **kwargs):
     return fs
 
 
-def refs_as_store(refs, read_only=True, remote_protocol=None, remote_options=None):
+def refs_as_store(refs, read_only=False, remote_protocol=None, remote_options=None):
     """Convert a reference set to a zarr store"""
     if is_zarr3():
         if remote_options is None:
@@ -69,7 +69,6 @@ def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, read_only=True):
     zarr.storage.Store or zarr.storage.Mapper, fsspec.AbstractFileSystem
     """
     if is_zarr3():
-        print(fs.async_impl is None)
         if not fs.async_impl:
             fs = AsyncFileSystemWrapper(fs)
         fs.asynchronous = True

From 543178d33eb62a73ac8f4ad184dee7d3fb941b9f Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 16:39:36 -0500
Subject: [PATCH 29/40] Figure out async wrapper

---
 kerchunk/tests/test_hdf.py |  7 +++++--
 kerchunk/utils.py          | 12 +++++++-----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index f600a127..122cced2 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -13,6 +13,7 @@
 import xarray as xr
 import zarr
 
+from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
 from kerchunk.hdf import SingleHdf5ToZarr, has_visititems_links
 from kerchunk.combine import MultiZarrToZarr, drop
 from kerchunk.utils import refs_as_fs, refs_as_store
@@ -164,7 +165,8 @@ def test_times(times_data):
         h5chunks = SingleHdf5ToZarr(f, url)
         test_dict = h5chunks.translate()
 
-    store = refs_as_store(test_dict, remote_protocol="file")
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(test_dict, fs=localfs)
     result = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
     )
@@ -179,7 +181,8 @@ def test_times_str(times_data):
     h5chunks = SingleHdf5ToZarr(url)
     test_dict = h5chunks.translate()
 
-    store = refs_as_store(test_dict)
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(test_dict, fs=localfs)
     result = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
     )
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index bb9cd4cb..667a8b74 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -14,20 +14,21 @@
 import zarr
 
 
-def refs_as_fs(refs, remote_protocol=None, remote_options=None, **kwargs):
+def refs_as_fs(refs, fs=None, remote_protocol=None, remote_options=None, **kwargs):
     """Convert a reference set to an fsspec filesystem"""
     fs = fsspec.filesystem(
         "reference",
         fo=refs,
+        fs=fs,
         remote_protocol=remote_protocol,
-        # remote_options=remote_options,
+        remote_options=remote_options,
         **kwargs,
         asynchronous=True
     )
     return fs
 
 
-def refs_as_store(refs, read_only=False, remote_protocol=None, remote_options=None):
+def refs_as_store(refs, read_only=False, fs=None, remote_protocol=None, remote_options=None):
     """Convert a reference set to a zarr store"""
     if is_zarr3():
         if remote_options is None:
@@ -35,12 +36,13 @@ def refs_as_store(refs, read_only=False, remote_protocol=None, remote_options=No
         else:
             remote_options["asynchronous"] = True
 
-    fs = refs_as_fs(
+    fss = refs_as_fs(
         refs,
+        fs=fs,
         remote_protocol=remote_protocol,
         remote_options=remote_options, 
     )
-    return fs_as_store(fs, read_only=read_only)
+    return fs_as_store(fss, read_only=read_only)
 
 
 def is_zarr3():

From 96b56cd39e564817a7b31d988c4a9ad37f8ea615 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 16:55:06 -0500
Subject: [PATCH 30/40] Closer on hdf5

---
 kerchunk/fits.py           |  4 +-
 kerchunk/hdf.py            | 13 +++----
 kerchunk/tests/test_hdf.py | 76 ++++++++++++++++++++++----------------
 kerchunk/utils.py          | 14 +++----
 4 files changed, 59 insertions(+), 48 deletions(-)

diff --git a/kerchunk/fits.py b/kerchunk/fits.py
index 2e84120f..70f48d8a 100644
--- a/kerchunk/fits.py
+++ b/kerchunk/fits.py
@@ -151,7 +151,7 @@ def process_file(
                         for name in dtype.names
                         if hdu.columns[name].format.startswith(("P", "Q"))
                     }
-                    kwargs["object_codec"] = VarArrCodec(
+                    kwargs["compressor"] = VarArrCodec(
                         str(dtype), str(dt2), nrows, types
                     )
                     dtype = dt2
@@ -165,7 +165,7 @@ def process_file(
             # TODO: we could sub-chunk on biggest dimension
             name = hdu.name or str(ext)
             arr = g.empty(
-                name=name, dtype=dtype, shape=shape, chunks=shape, compressor=None, zarr_format=2, **kwargs
+                name=name, dtype=dtype, shape=shape, chunks=shape, zarr_format=2, **kwargs
             )
             arr.attrs.update(
                 {
diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index f72bf8a2..56ae958a 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -325,11 +325,11 @@ def _translator(
                                             for v in val
                                         ]
                             kwargs["data"] = out
-                            kwargs["object_codec"] = numcodecs.JSON()
+                            kwargs["compressor"] = numcodecs.JSON()
                             fill = None
                         elif self.vlen == "null":
                             dt = "O"
-                            kwargs["object_codec"] = FillStringsCodec(dtype="S16")
+                            kwargs["compressor"] = FillStringsCodec(dtype="S16")
                             fill = " "
                         elif self.vlen == "leave":
                             dt = "S16"
@@ -344,7 +344,7 @@ def _translator(
                                 index.decode(): label.decode()
                                 for index, label in zip(indexes, labels)
                             }
-                            kwargs["object_codec"] = FillStringsCodec(
+                            kwargs["compressor"] = FillStringsCodec(
                                 dtype="S16", id_map=mapping
                             )
                             fill = " "
@@ -384,7 +384,7 @@ def _translator(
                                             )
                                         }
                                     )
-                            kwargs["object_codec"] = FillStringsCodec(
+                            kwargs["compressor"] = FillStringsCodec(
                                 dtype=str(dt), id_map=mapping
                             )
                             dt = [
@@ -410,7 +410,7 @@ def _translator(
                                 )
                                 for v in h5obj.dtype.names
                             ]
-                            kwargs["object_codec"] = FillStringsCodec(dtype=str(dt))
+                            kwargs["compressor"] = FillStringsCodec(dtype=str(dt))
                             dt = [
                                 (
                                     v,
@@ -451,7 +451,7 @@ def _translator(
                                 )
                             dt = "O"
                             kwargs["data"] = data2
-                            kwargs["object_codec"] = numcodecs.JSON()
+                            kwargs["compressor"] = numcodecs.JSON()
                             fill = None
                         else:
                             raise NotImplementedError
@@ -473,7 +473,6 @@ def _translator(
                     dtype=dt or h5obj.dtype,
                     chunks=h5obj.chunks or False,
                     fill_value=fill,
-                    compressor=None,
                     filters=filters,
                     attributes={
                         "_ARRAY_DIMENSIONS": adims,
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 122cced2..ecfffa1a 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -198,23 +198,26 @@ def test_string_embed():
     fn = osp.join(here, "vlen.h5")
     h = kerchunk.hdf.SingleHdf5ToZarr(fn, fn, vlen_encode="embed")
     out = h.translate()
-    fs = refs_as_fs(out)
-    assert txt in fs.references["vlen_str/0"]
+
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    fs = refs_as_fs(out, fs=localfs)
+    #assert txt in fs.references["vlen_str/0"]
     store = fs_as_store(fs)
     z = zarr.open(store, zarr_format=2)
-    assert z.vlen_str.dtype == "O"
-    assert z.vlen_str[0] == txt
-    assert (z.vlen_str[1:] == "").all()
+    assert z["vlen_str"].dtype == "O"
+    assert z["vlen_str"][0] == txt
+    assert (z["vlen_str"][1:] == "").all()
 
 
 def test_string_null():
     fn = osp.join(here, "vlen.h5")
     h = kerchunk.hdf.SingleHdf5ToZarr(fn, fn, vlen_encode="null", inline_threshold=0)
     out = h.translate()
-    store = refs_as_store(out)
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(out, fs=localfs)
     z = zarr.open(store, zarr_format=2)
-    assert z.vlen_str.dtype == "O"
-    assert (z.vlen_str[:] == None).all()
+    assert z["vlen_str"].dtype == "O"
+    assert (z["vlen_str"][:] == None).all()
 
 
 def test_string_leave():
@@ -224,11 +227,13 @@ def test_string_leave():
             f, fn, vlen_encode="leave", inline_threshold=0
         )
         out = h.translate()
-    store = refs_as_store(out)
+    
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(out, fs=localfs)
     z = zarr.open(store, zarr_format=2)
-    assert z.vlen_str.dtype == "S16"
-    assert z.vlen_str[0]  # some obscured ID
-    assert (z.vlen_str[1:] == b"").all()
+    assert z["vlen_str"].dtype == "S16"
+    assert z["vlen_str"][0]  # some obscured ID
+    assert (z["vlen_str"][1:] == b"").all()
 
 
 def test_string_decode():
@@ -238,12 +243,13 @@ def test_string_decode():
             f, fn, vlen_encode="encode", inline_threshold=0
         )
         out = h.translate()
-    fs = refs_as_fs(out)
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    fs = refs_as_fs(out, fs=localfs)
     assert txt in fs.cat("vlen_str/.zarray").decode()  # stored in filter def
     store = fs_as_store(fs)
     z = zarr.open(store, zarr_format=2)
-    assert z.vlen_str[0] == txt
-    assert (z.vlen_str[1:] == "").all()
+    assert z["vlen_str"][0] == txt
+    assert (z["vlen_str"][1:] == "").all()
 
 
 def test_compound_string_null():
@@ -251,11 +257,12 @@ def test_compound_string_null():
     with open(fn, "rb") as f:
         h = kerchunk.hdf.SingleHdf5ToZarr(f, fn, vlen_encode="null", inline_threshold=0)
         out = h.translate()
-    store = refs_as_store(out)
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(out, fs=localfs)
     z = zarr.open(store, zarr_format=2)
-    assert z.vlen_str[0].tolist() == (10, None)
-    assert (z.vlen_str["ints"][1:] == 0).all()
-    assert (z.vlen_str["strs"][1:] == None).all()
+    assert z["vlen_str"][0].tolist() == (10, None)
+    assert (z["vlen_str"]["ints"][1:] == 0).all()
+    assert (z["vlen_str"]["strs"][1:] == None).all()
 
 
 def test_compound_string_leave():
@@ -265,12 +272,13 @@ def test_compound_string_leave():
             f, fn, vlen_encode="leave", inline_threshold=0
         )
         out = h.translate()
-    store = refs_as_store(out)
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(out, fs=localfs)
     z = zarr.open(store, zarr_format=2)
-    assert z.vlen_str["ints"][0] == 10
-    assert z.vlen_str["strs"][0]  # random ID
-    assert (z.vlen_str["ints"][1:] == 0).all()
-    assert (z.vlen_str["strs"][1:] == b"").all()
+    assert z["vlen_str"]["ints"][0] == 10
+    assert z["vlen_str"]["strs"][0]  # random ID
+    assert (z["vlen_str"]["ints"][1:] == 0).all()
+    assert (z["vlen_str"]["strs"][1:] == b"").all()
 
 
 def test_compound_string_encode():
@@ -280,12 +288,13 @@ def test_compound_string_encode():
             f, fn, vlen_encode="encode", inline_threshold=0
         )
         out = h.translate()
-    store = refs_as_store(out)
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(out, fs=localfs)
     z = zarr.open(store, zarr_format=2)
-    assert z.vlen_str["ints"][0] == 10
-    assert z.vlen_str["strs"][0] == "water"
-    assert (z.vlen_str["ints"][1:] == 0).all()
-    assert (z.vlen_str["strs"][1:] == "").all()
+    assert z["vlen_str"]["ints"][0] == 10
+    assert z["vlen_str"]["strs"][0] == "water"
+    assert (z["vlen_str"]["ints"][1:] == 0).all()
+    assert (z["vlen_str"]["strs"][1:] == "").all()
 
 
 # def test_compact():
@@ -311,7 +320,8 @@ def test_compress():
                 h.translate()
             continue
         out = h.translate()
-        store = refs_as_store(out)
+        localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+        store = refs_as_store(out, fs=localfs)
         g = zarr.open(store, zarr_format=2)
         assert np.mean(g.data) == 49.5
 
@@ -321,7 +331,8 @@ def test_embed():
     h = kerchunk.hdf.SingleHdf5ToZarr(fn, vlen_encode="embed")
     out = h.translate()
 
-    store = refs_as_store(out)
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(out, fs=localfs)
     z = zarr.open(store, zarr_format=2)
     data = z["Domain_10"]["STER"]["min_1"]["boom_1"]["temperature"][:]
     assert data[0].tolist() == [
@@ -356,7 +367,8 @@ def test_translate_links():
     out = kerchunk.hdf.SingleHdf5ToZarr(fn, inline_threshold=50).translate(
         preserve_linked_dsets=True
     )
-    store = refs_as_store(out)
+    localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(out, fs=localfs)
     z = zarr.open(store, zarr_format=2)
 
     # 1. Test the hard linked datasets were translated correctly
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 667a8b74..773d5dd1 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -58,7 +58,7 @@ def dict_to_store(store_dict: dict):
         return zarr.storage.KVStore(store_dict)
 
 
-def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, read_only=True):
+def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, read_only=False):
     """Open the refs as a zarr store
 
     Parameters
@@ -204,14 +204,14 @@ def _encode_for_JSON(store):
     return store
 
 
-def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any:
+def encode_fill_value(v: Any, dtype: np.dtype, compressor: Any = None) -> Any:
     # early out
     if v is None:
         return v
     if dtype.kind == "V" and dtype.hasobject:
-        if object_codec is None:
-            raise ValueError("missing object_codec for object array")
-        v = object_codec.encode(v)
+        if compressor is None:
+            raise ValueError("missing compressor for object array")
+        v = compressor.encode(v)
         v = str(base64.standard_b64encode(v), "ascii")
         return v
     if dtype.kind == "f":
@@ -230,8 +230,8 @@ def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any:
     elif dtype.kind in "c":
         c = cast(np.complex128, np.dtype(complex).type())
         v = (
-            encode_fill_value(v.real, c.real.dtype, object_codec),
-            encode_fill_value(v.imag, c.imag.dtype, object_codec),
+            encode_fill_value(v.real, c.real.dtype, compressor),
+            encode_fill_value(v.imag, c.imag.dtype, compressor),
         )
         return v
     elif dtype.kind in "SV":

From 0808b05b64eb7d378f226d55297298c7fa2540c6 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 16:59:20 -0500
Subject: [PATCH 31/40] netcdf but failing

---
 kerchunk/tests/test_netcdf.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/kerchunk/tests/test_netcdf.py b/kerchunk/tests/test_netcdf.py
index 755823da..e6bfd066 100644
--- a/kerchunk/tests/test_netcdf.py
+++ b/kerchunk/tests/test_netcdf.py
@@ -1,12 +1,12 @@
 import os
 
-
 import fsspec
 import numpy as np
 from packaging.version import Version
 import pytest
 from kerchunk import netCDF3
 
+from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
 from kerchunk.utils import refs_as_store
 
 xr = pytest.importorskip("xarray")
@@ -31,7 +31,7 @@ def test_one(m):
     h = netCDF3.netcdf_recording_file("memory://data.nc3")
     out = h.translate()
 
-    store = refs_as_store(out, remote_protocol="memory")
+    store = refs_as_store(out)
 
     ds = xr.open_dataset(
         store,
@@ -86,13 +86,14 @@ def test_unlimited(unlimited_dataset):
     expected = xr.open_dataset(fn, engine="scipy")
     h = netCDF3.NetCDF3ToZarr(fn)
     out = h.translate()
-    ds = xr.open_dataset(
-        "reference://",
-        engine="zarr",
-        backend_kwargs={
-            "consolidated": False,
-            "storage_options": {"fo": out},
-        },
+
+    fs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(out, fs)
+
+    ds = xr.open_zarr(
+        store,
+        zarr_format=2,
+        consolidated=False,
     )
     assert ds.attrs["title"] == "testing"
     assert ds.temp.attrs["units"] == "K"

From aef006e342e56aa03e771a79d6262cb9b999b105 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 17:06:34 -0500
Subject: [PATCH 32/40] grib passing

---
 kerchunk/tests/test_grib.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py
index 9bc90b71..5925abc6 100644
--- a/kerchunk/tests/test_grib.py
+++ b/kerchunk/tests/test_grib.py
@@ -9,6 +9,7 @@
 #import datatree
 import zarr
 import ujson
+from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
 from kerchunk.grib2 import (
     scan_grib,
     _split_file,
@@ -32,10 +33,13 @@ def test_one():
     # from https://dd.weather.gc.ca/model_gem_regional/10km/grib2/00/000
     fn = os.path.join(here, "CMC_reg_DEPR_ISBL_10_ps10km_2022072000_P000.grib2")
     out = scan_grib(fn)
-    ds = xr.open_dataset(
-        "reference://",
-        engine="zarr",
-        backend_kwargs={"consolidated": False, "storage_options": {"fo": out[0]}},
+
+    fs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
+    store = refs_as_store(out[0], fs=fs)
+    ds = xr.open_zarr(
+        store, 
+        zarr_format=2,
+        consolidated=False
     )
 
     assert ds.attrs["GRIB_centre"] == "cwao"

From d9bf0dd1f10463ee26b2558ef1fba6764d5609c4 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 17:17:52 -0500
Subject: [PATCH 33/40] Fix inline test

---
 kerchunk/tests/test_utils.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/kerchunk/tests/test_utils.py b/kerchunk/tests/test_utils.py
index 701427e2..5b556794 100644
--- a/kerchunk/tests/test_utils.py
+++ b/kerchunk/tests/test_utils.py
@@ -8,6 +8,8 @@
 import pytest
 import zarr
 
+from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
+
 
 def test_rename():
     old = {"version": 1, "refs": {"v0": ["oldpath", 0, 0], "bin": "data"}}
@@ -75,17 +77,17 @@ def test_inline_array():
     out1 = kerchunk.utils.inline_array(refs, threshold=1)  # does nothing
     assert out1 == refs
     out2 = kerchunk.utils.inline_array(refs, threshold=1, names=["data"])  # explicit
-    assert "data/1" not in out2  # TODO: Is this wrong? I dont think zarr deletes existing chunks when overwriting
     assert json.loads(out2["data/.zattrs"]) == json.loads(refs["data/.zattrs"])
-    store = kerchunk.utils.refs_as_store(out2)
+
+    localfs = fsspec.filesystem("file")
+    store = kerchunk.utils.refs_as_store(out2, fs=localfs)
     g = zarr.open(store, mode='r', zarr_format=2)
-    assert g.data[:].tolist() == [1, 2] # What is g.data???
+    assert g["data"][:].tolist() == [1, 2] # What is g.data???
 
     out3 = kerchunk.utils.inline_array(refs, threshold=1000)  # inlines because of size
-    assert "data/1" not in out3
-    store = kerchunk.utils.refs_as_store(out3)
+    store = kerchunk.utils.refs_as_store(out3, localfs)
     g = zarr.open(store, mode='r', zarr_format=2)
-    assert g.data[:].tolist() == [1, 2] # What is g.data???
+    assert g["data"][:].tolist() == [1, 2] # What is g.data???
 
 
 def test_json():

From 884fc685ecff296cf8f677334a8990860fb0d9ae Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 17:27:03 -0500
Subject: [PATCH 34/40] More

---
 kerchunk/tests/test_zarr.py | 1 +
 kerchunk/xarray_backend.py  | 8 +++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/kerchunk/tests/test_zarr.py b/kerchunk/tests/test_zarr.py
index 94af8939..3c02fc69 100644
--- a/kerchunk/tests/test_zarr.py
+++ b/kerchunk/tests/test_zarr.py
@@ -54,6 +54,7 @@ def test_zarr_in_zip(zarr_in_zip, ds):
     out = kerchunk.zarr.ZarrToZarr(
         url="zip://", storage_options={"fo": zarr_in_zip}
     ).translate()
+
     ds2 = xr.open_dataset(
         out,
         engine="kerchunk",
diff --git a/kerchunk/xarray_backend.py b/kerchunk/xarray_backend.py
index dfbbafba..0620614b 100644
--- a/kerchunk/xarray_backend.py
+++ b/kerchunk/xarray_backend.py
@@ -3,6 +3,8 @@
 import os
 import fsspec
 
+from kerchunk.utils import refs_as_store
+
 
 class KerchunkBackend(BackendEntrypoint):
     def open_dataset(
@@ -41,8 +43,8 @@ def open_reference_dataset(
     if open_dataset_options is None:
         open_dataset_options = {}
 
-    m = fsspec.get_mapper("reference://", fo=filename_or_obj, **storage_options)
+    store = refs_as_store(filename_or_obj, remote_options=storage_options)
 
-    return xr.open_dataset(
-        m, engine="zarr", zarr_format=2, consolidated=False, **open_dataset_options
+    return xr.open_zarr(
+        store, zarr_format=2, consolidated=False, **open_dataset_options
     )

From 1145f454afd3ad663bdc0d55ae5003fc65ee5ae8 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 21:47:45 -0500
Subject: [PATCH 35/40] standardize compressor name

---
 kerchunk/combine.py          | 4 ++--
 kerchunk/hdf4.py             | 4 ++--
 kerchunk/tests/test_df.py    | 2 +-
 kerchunk/tests/test_utils.py | 4 ++--
 kerchunk/tests/test_zarr.py  | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/kerchunk/combine.py b/kerchunk/combine.py
index 841b9e8a..376a8003 100644
--- a/kerchunk/combine.py
+++ b/kerchunk/combine.py
@@ -409,7 +409,7 @@ def store_coords(self):
                 # The names of the variables to write in the second pass, not a coordinate
                 continue
             # parametrize the threshold value below?
-            compression = numcodecs.Zstd() if len(v) > 100 else None
+            compressor = numcodecs.Zstd() if len(v) > 100 else None
             kw = {}
             if self.cf_units and k in self.cf_units:
                 if "M" not in self.coo_dtypes.get(k, ""):
@@ -439,7 +439,7 @@ def store_coords(self):
                 data=data,
                 shape=data.shape,
                 exists_ok=True,
-                compressor=compression,
+                compressor=compressor,
                 dtype=self.coo_dtypes.get(k, data.dtype),
                 **kw,
             )
diff --git a/kerchunk/hdf4.py b/kerchunk/hdf4.py
index 16b08740..030c33a0 100644
--- a/kerchunk/hdf4.py
+++ b/kerchunk/hdf4.py
@@ -148,13 +148,13 @@ def translate(self, filename=None, storage_options=None):
         refs = {}
         for k, v in output.items():
             if isinstance(v, dict):
-                compression = ZlibCodec() if "refs" in v else None
+                compressor = ZlibCodec() if "refs" in v else None
                 arr = g.create_dataset(
                     name=k,
                     shape=v["dims"],
                     dtype=v["dtype"],
                     chunks=v.get("chunks", v["dims"]),
-                    compressor=compression,
+                    compressor=compressor,
                     exists_ok=True,
                 )
                 arr.attrs.update(
diff --git a/kerchunk/tests/test_df.py b/kerchunk/tests/test_df.py
index 0d0fafb1..45bcb9bc 100644
--- a/kerchunk/tests/test_df.py
+++ b/kerchunk/tests/test_df.py
@@ -18,7 +18,7 @@ def test_1(m, url):
         "a/4": ["memory://url4.file"],
         "a/5": ["memory://url5.file"],
         "a/6": b"data",
-        "a/.zarray": b"""{"shape": [7], "chunks":[1], "filters": [], "compression": null}""",
+        "a/.zarray": b"""{"shape": [7], "chunks":[1], "filters": [], "compressor": null}""",
         ".zgroup": b'{"zarr_format": 2}',
     }
     u = "memory://myrefs.json"
diff --git a/kerchunk/tests/test_utils.py b/kerchunk/tests/test_utils.py
index 5b556794..a29e3b4f 100644
--- a/kerchunk/tests/test_utils.py
+++ b/kerchunk/tests/test_utils.py
@@ -102,7 +102,7 @@ def test_subchunk_exact(m, chunks):
     store = m.get_mapper("test.zarr")
     g = zarr.open_group(store, mode="w", zarr_format=2)
     data = np.arange(100).reshape(10, 10)
-    arr = g.create_dataset("data", data=data, chunks=chunks, compression=None)
+    arr = g.create_dataset("data", data=data, chunks=chunks, compressor=None)
     ref = kerchunk.zarr.single_zarr("memory://test.zarr")["refs"]
 
     extra = [] if chunks[0] == 10 else ["data/1.0"]
@@ -162,7 +162,7 @@ def test_deflate_zip_archive(m):
 
     data = b"piece of data"
     with fsspec.open("memory://archive", "wb") as f:
-        arc = zipfile.ZipFile(file=f, mode="w", compression=zipfile.ZIP_DEFLATED)
+        arc = zipfile.ZipFile(file=f, mode="w", compressor=zipfile.ZIP_DEFLATED)
         arc.writestr("data1", data)
         arc.close()
     refs = {
diff --git a/kerchunk/tests/test_zarr.py b/kerchunk/tests/test_zarr.py
index 3c02fc69..b78baaaa 100644
--- a/kerchunk/tests/test_zarr.py
+++ b/kerchunk/tests/test_zarr.py
@@ -37,7 +37,7 @@ def _zip(file):
 
         filename = file + os.path.extsep + "zip"
         with zipfile.ZipFile(
-            filename, "w", compression=zipfile.ZIP_STORED, allowZip64=True
+            filename, "w", compressor=zipfile.ZIP_STORED, allowZip64=True
         ) as fh:
             for root, _, filenames in os.walk(file):
                 for each_filename in filenames:

From 94ec47938c8eed8319ddfc80c6cc36189579b973 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Tue, 26 Nov 2024 21:53:08 -0500
Subject: [PATCH 36/40] Fix one more hdf test

---
 kerchunk/tests/test_hdf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index ecfffa1a..68961394 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -323,7 +323,7 @@ def test_compress():
         localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
         store = refs_as_store(out, fs=localfs)
         g = zarr.open(store, zarr_format=2)
-        assert np.mean(g.data) == 49.5
+        assert np.mean(g["data"]) == 49.5
 
 
 def test_embed():

From a9693d1b5be8c5752b63221beef7831ae0b5584b Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Wed, 27 Nov 2024 10:39:29 -0500
Subject: [PATCH 37/40] Small tweaks

---
 kerchunk/tests/test_netcdf.py | 2 ++
 kerchunk/tests/test_utils.py  | 2 +-
 kerchunk/tests/test_zarr.py   | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kerchunk/tests/test_netcdf.py b/kerchunk/tests/test_netcdf.py
index e6bfd066..b7143398 100644
--- a/kerchunk/tests/test_netcdf.py
+++ b/kerchunk/tests/test_netcdf.py
@@ -31,6 +31,8 @@ def test_one(m):
     h = netCDF3.netcdf_recording_file("memory://data.nc3")
     out = h.translate()
 
+    print(out)
+
     store = refs_as_store(out)
 
     ds = xr.open_dataset(
diff --git a/kerchunk/tests/test_utils.py b/kerchunk/tests/test_utils.py
index a29e3b4f..5cbfb150 100644
--- a/kerchunk/tests/test_utils.py
+++ b/kerchunk/tests/test_utils.py
@@ -162,7 +162,7 @@ def test_deflate_zip_archive(m):
 
     data = b"piece of data"
     with fsspec.open("memory://archive", "wb") as f:
-        arc = zipfile.ZipFile(file=f, mode="w", compressor=zipfile.ZIP_DEFLATED)
+        arc = zipfile.ZipFile(file=f, mode="w", compression=zipfile.ZIP_DEFLATED)
         arc.writestr("data1", data)
         arc.close()
     refs = {
diff --git a/kerchunk/tests/test_zarr.py b/kerchunk/tests/test_zarr.py
index b78baaaa..3c02fc69 100644
--- a/kerchunk/tests/test_zarr.py
+++ b/kerchunk/tests/test_zarr.py
@@ -37,7 +37,7 @@ def _zip(file):
 
         filename = file + os.path.extsep + "zip"
         with zipfile.ZipFile(
-            filename, "w", compressor=zipfile.ZIP_STORED, allowZip64=True
+            filename, "w", compression=zipfile.ZIP_STORED, allowZip64=True
         ) as fh:
             for root, _, filenames in os.walk(file):
                 for each_filename in filenames:

From 7e9112ad7418fee0acde01a4fb5f2c91fc805121 Mon Sep 17 00:00:00 2001
From: Matthew Iannucci <matthew@earthmover.io>
Date: Wed, 27 Nov 2024 10:55:54 -0500
Subject: [PATCH 38/40] Hide fsspec import where necessary

---
 kerchunk/utils.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index 773d5dd1..b8a53e3c 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -9,7 +9,6 @@
 import ujson
 
 import fsspec
-from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
 import numpy as np
 import zarr
 
@@ -23,12 +22,14 @@ def refs_as_fs(refs, fs=None, remote_protocol=None, remote_options=None, **kwarg
         remote_protocol=remote_protocol,
         remote_options=remote_options,
         **kwargs,
-        asynchronous=True
+        asynchronous=True,
     )
     return fs
 
 
-def refs_as_store(refs, read_only=False, fs=None, remote_protocol=None, remote_options=None):
+def refs_as_store(
+    refs, read_only=False, fs=None, remote_protocol=None, remote_options=None
+):
     """Convert a reference set to a zarr store"""
     if is_zarr3():
         if remote_options is None:
@@ -40,7 +41,7 @@ def refs_as_store(refs, read_only=False, fs=None, remote_protocol=None, remote_o
         refs,
         fs=fs,
         remote_protocol=remote_protocol,
-        remote_options=remote_options, 
+        remote_options=remote_options,
     )
     return fs_as_store(fss, read_only=read_only)
 
@@ -72,7 +73,14 @@ def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, read_only=False):
     """
     if is_zarr3():
         if not fs.async_impl:
-            fs = AsyncFileSystemWrapper(fs)
+            try:
+                from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
+
+                fs = AsyncFileSystemWrapper(fs)
+            except ImportError:
+                raise ImportError(
+                    "Only fsspec>2024.10.0 supports the async filesystem wrapper required for working with reference filesystems. "
+                )
         fs.asynchronous = True
         return zarr.storage.RemoteStore(fs, read_only=read_only)
     else:

From a7af691c2aea422783907362be834913648fe61d Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Thu, 16 Jan 2025 09:52:24 -0500
Subject: [PATCH 39/40] Update with many fixes - but stioll not complete

---
 kerchunk/__init__.py           |  2 +-
 kerchunk/codecs.py             |  7 ++--
 kerchunk/combine.py            | 20 +++++----
 kerchunk/fits.py               |  9 ++--
 kerchunk/hdf.py                | 75 ++++++++++++++++++++++------------
 kerchunk/netCDF3.py            | 12 ++++--
 kerchunk/tests/test_combine.py | 26 +++++++-----
 kerchunk/tests/test_hdf.py     | 14 +++----
 kerchunk/tests/test_tiff.py    |  4 +-
 kerchunk/tests/test_utils.py   | 28 +++++++------
 kerchunk/tests/test_zarr.py    |  4 +-
 kerchunk/utils.py              | 68 +++++++++++++++---------------
 kerchunk/xarray_backend.py     |  2 +-
 kerchunk/zarr.py               | 17 +++++++-
 pytest.ini                     |  2 +
 15 files changed, 173 insertions(+), 117 deletions(-)
 create mode 100644 pytest.ini

diff --git a/kerchunk/__init__.py b/kerchunk/__init__.py
index 21b4e540..85863c32 100644
--- a/kerchunk/__init__.py
+++ b/kerchunk/__init__.py
@@ -1,4 +1,4 @@
-from . import codecs
+from kerchunk import codecs
 
 from importlib.metadata import version as _version
 
diff --git a/kerchunk/codecs.py b/kerchunk/codecs.py
index 46b19072..c0680da8 100644
--- a/kerchunk/codecs.py
+++ b/kerchunk/codecs.py
@@ -134,7 +134,7 @@ def __init__(self, *, var: str, dtype: np.dtype) -> None:
         object.__setattr__(self, "dtype", dtype)
 
     @classmethod
-    def from_dict(cls, data: dict[str, JSON]) -> Self:
+    def from_dict(cls, data: dict[str, JSON]) -> "GRIBZarrCodec":
         _, configuration_parsed = parse_named_configuration(
             data, "bytes", require_configuration=True
         )
@@ -149,7 +149,7 @@ def to_dict(self) -> dict[str, JSON]:
                 "name": "grib",
                 "configuration": {"var": self.var, "dtype": self.dtype},
             }
-        
+
     async def _decode_single(
         self,
         chunk_bytes: Buffer,
@@ -322,7 +322,8 @@ def encode(self, buf):
 class ZlibCodec(Codec):
     codec_id = "zlib"
 
-    def __init__(self): ...
+    def __init__(self):
+        ...
 
     def decode(self, data, out=None):
         if out:
diff --git a/kerchunk/combine.py b/kerchunk/combine.py
index 376a8003..ca3e488d 100644
--- a/kerchunk/combine.py
+++ b/kerchunk/combine.py
@@ -201,7 +201,7 @@ def append(
             remote_protocol=remote_protocol,
             remote_options=remote_options,
             target_options=target_options,
-            asynchronous=True
+            asynchronous=True,
         )
         ds = xr.open_dataset(
             fs.get_mapper(), engine="zarr", backend_kwargs={"consolidated": False}
@@ -267,7 +267,9 @@ def fss(self):
                 self._paths = []
                 for of in fsspec.open_files(self.path, **self.target_options):
                     self._paths.append(of.full_name)
-                fs = fsspec.core.url_to_fs(self.path[0], asynchronous=True, **self.target_options)[0]
+                fs = fsspec.core.url_to_fs(
+                    self.path[0], asynchronous=True, **self.target_options
+                )[0]
                 try:
                     # JSON path
                     fo_list = fs.cat(self.path)
@@ -436,13 +438,13 @@ def store_coords(self):
                     kw["fill_value"] = z[k].fill_value
             arr = group.create_array(
                 name=k,
-                data=data,
                 shape=data.shape,
-                exists_ok=True,
+                overwrite=True,
                 compressor=compressor,
                 dtype=self.coo_dtypes.get(k, data.dtype),
                 **kw,
             )
+            arr[:] = data
             if k in z:
                 # copy attributes if values came from an original variable
                 arr.attrs.update(z[k].attrs)
@@ -505,7 +507,9 @@ def second_pass(self):
                 if f"{v}/.zgroup" in fns:
                     # recurse into groups - copy meta, add to dirs to process and don't look
                     # for references in this dir
-                    metadata = asyncio.run(self._read_meta_files(m, [f"{v}/.zgroup", f"{v}/.zattrs"]))
+                    metadata = asyncio.run(
+                        self._read_meta_files(m, [f"{v}/.zgroup", f"{v}/.zattrs"])
+                    )
                     self.out.update(metadata)
                     dirs.extend([f for f in fns if not f.startswith(f"{v}/.z")])
                     continue
@@ -517,8 +521,10 @@ def second_pass(self):
                             self.out[k] = fs.references[k]
                     continue
                 logger.debug("Second pass: %s, %s", i, v)
-                
-                zarray = asyncio.run(self._read_meta_files(m, [f"{v}/.zarray"]))[f"{v}/.zarray"]
+
+                zarray = asyncio.run(self._read_meta_files(m, [f"{v}/.zarray"]))[
+                    f"{v}/.zarray"
+                ]
                 zarray = ujson.loads(zarray)
                 if v not in chunk_sizes:
                     chunk_sizes[v] = zarray["chunks"]
diff --git a/kerchunk/fits.py b/kerchunk/fits.py
index 70f48d8a..7afadd6d 100644
--- a/kerchunk/fits.py
+++ b/kerchunk/fits.py
@@ -8,7 +8,7 @@
 from fsspec.implementations.reference import LazyReferenceMapper
 
 
-from kerchunk.utils import class_factory, dict_to_store
+from kerchunk.utils import class_factory, dict_to_store, translate_refs_serializable
 from kerchunk.codecs import AsciiTableCodec, VarArrCodec
 
 try:
@@ -94,7 +94,7 @@ def process_file(
             hdu.header.__str__()  # causes fixing of invalid cards
 
             attrs = dict(hdu.header)
-            kwargs = {}
+            kwargs = {"compressor": None}
             if hdu.is_image:
                 # for images/cubes (i.e., ndarrays with simple type)
                 nax = hdu.header["NAXIS"]
@@ -164,8 +164,8 @@ def process_file(
             # one chunk for whole thing.
             # TODO: we could sub-chunk on biggest dimension
             name = hdu.name or str(ext)
-            arr = g.empty(
-                name=name, dtype=dtype, shape=shape, chunks=shape, zarr_format=2, **kwargs
+            arr = g.create_array(
+                name=name, dtype=dtype, shape=shape, chunks=shape, **kwargs
             )
             arr.attrs.update(
                 {
@@ -191,6 +191,7 @@ def process_file(
             )
     if isinstance(out, LazyReferenceMapper):
         out.flush()
+    out = translate_refs_serializable(out)
     return out
 
 
diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
index 56ae958a..e0d58951 100644
--- a/kerchunk/hdf.py
+++ b/kerchunk/hdf.py
@@ -10,7 +10,12 @@
 import numcodecs
 
 from .codecs import FillStringsCodec
-from .utils import _encode_for_JSON, encode_fill_value, dict_to_store, translate_refs_serializable
+from .utils import (
+    _encode_for_JSON,
+    encode_fill_value,
+    dict_to_store,
+    translate_refs_serializable,
+)
 
 try:
     import h5py
@@ -32,6 +37,7 @@
     "_nc3_strict",
     "_NCProperties",
 }
+fsspec.utils.setup_logging(lggr)
 
 
 class SingleHdf5ToZarr:
@@ -173,6 +179,7 @@ def _transfer_attrs(
             An equivalent Zarr group or array to the HDF5 group or dataset with
             attributes.
         """
+        upd = {}
         for n, v in h5obj.attrs.items():
             if n in _HIDDEN_ATTRS:
                 continue
@@ -196,11 +203,19 @@ def _transfer_attrs(
             if v == "DIMENSION_SCALE":
                 continue
             try:
-                zobj.attrs[n] = v
+                if isinstance(v, (str, int, float)):
+                    upd[n] = v
+                elif isinstance(v, (tuple, set, list)) and all(
+                    isinstance(_, (str, int, float)) for _ in v
+                ):
+                    upd[n] = list(v)
+                else:
+                    upd[n] = str(v)
             except TypeError:
                 lggr.debug(
                     f"TypeError transferring attr, skipping:\n {n}@{h5obj.name} = {v} ({type(v)})"
                 )
+        zobj.attrs.update(upd)
 
     def _decode_filters(self, h5obj: Union[h5py.Dataset, h5py.Group]):
         if h5obj.scaleoffset:
@@ -272,7 +287,7 @@ def _translator(
     ):
         """Produce Zarr metadata for all groups and datasets in the HDF5 file."""
         try:  # method must not raise exception
-            kwargs = {}
+            kwargs = {"compressor": None}
 
             if isinstance(h5obj, (h5py.SoftLink, h5py.HardLink)):
                 h5obj = self._h5f[name]
@@ -289,9 +304,9 @@ def _translator(
                 if h5obj.id.get_create_plist().get_layout() == h5py.h5d.COMPACT:
                     # Only do if h5obj.nbytes < self.inline??
                     kwargs["data"] = h5obj[:]
-                    filters = []
+                    kwargs["filters"] = []
                 else:
-                    filters = self._decode_filters(h5obj)
+                    kwargs["filters"] = self._decode_filters(h5obj)
                 dt = None
                 # Get storage info of this HDF5 dataset...
                 cinfo = self._storage_info(h5obj)
@@ -325,11 +340,11 @@ def _translator(
                                             for v in val
                                         ]
                             kwargs["data"] = out
-                            kwargs["compressor"] = numcodecs.JSON()
+                            kwargs["filters"] = [numcodecs.JSON()]
                             fill = None
                         elif self.vlen == "null":
                             dt = "O"
-                            kwargs["compressor"] = FillStringsCodec(dtype="S16")
+                            kwargs["filters"] = [FillStringsCodec(dtype="S16")]
                             fill = " "
                         elif self.vlen == "leave":
                             dt = "S16"
@@ -344,9 +359,9 @@ def _translator(
                                 index.decode(): label.decode()
                                 for index, label in zip(indexes, labels)
                             }
-                            kwargs["compressor"] = FillStringsCodec(
-                                dtype="S16", id_map=mapping
-                            )
+                            kwargs["filters"] = [
+                                FillStringsCodec(dtype="S16", id_map=mapping)
+                            ]
                             fill = " "
                         else:
                             raise NotImplementedError
@@ -384,9 +399,9 @@ def _translator(
                                             )
                                         }
                                     )
-                            kwargs["compressor"] = FillStringsCodec(
-                                dtype=str(dt), id_map=mapping
-                            )
+                            kwargs["filters"] = [
+                                FillStringsCodec(dtype=str(dt), id_map=mapping)
+                            ]
                             dt = [
                                 (
                                     v,
@@ -410,7 +425,7 @@ def _translator(
                                 )
                                 for v in h5obj.dtype.names
                             ]
-                            kwargs["compressor"] = FillStringsCodec(dtype=str(dt))
+                            kwargs["filters"] = [FillStringsCodec(dtype=str(dt))]
                             dt = [
                                 (
                                     v,
@@ -451,7 +466,7 @@ def _translator(
                                 )
                             dt = "O"
                             kwargs["data"] = data2
-                            kwargs["compressor"] = numcodecs.JSON()
+                            kwargs["filters"] = [numcodecs.JSON()]
                             fill = None
                         else:
                             raise NotImplementedError
@@ -460,20 +475,18 @@ def _translator(
                         return
                     if h5obj.attrs.get("_FillValue") is not None:
                         fill = h5obj.attrs.get("_FillValue")
-                        fill = encode_fill_value(
-                            fill, dt or h5obj.dtype
-                        )
+                        fill = encode_fill_value(fill, dt or h5obj.dtype)
 
                 adims = self._get_array_dims(h5obj)
 
-                # Create a Zarr array equivalent to this HDF5 dataset..
-                za = self._zroot.require_array(
+                # Create a Zarr array equivalent to this HDF5 dataset.
+                data = kwargs.pop("data", None)
+                za = self._zroot.create_array(
                     name=h5obj.name,
                     shape=h5obj.shape,
                     dtype=dt or h5obj.dtype,
-                    chunks=h5obj.chunks or False,
+                    chunks=h5obj.chunks or h5obj.shape,
                     fill_value=fill,
-                    filters=filters,
                     attributes={
                         "_ARRAY_DIMENSIONS": adims,
                     },
@@ -483,9 +496,14 @@ def _translator(
                 self._transfer_attrs(h5obj, za)
 
                 lggr.debug(f"_ARRAY_DIMENSIONS = {adims}")
-
-                if "data" in kwargs:
-                    return  # embedded bytes, no chunks to copy
+                if data is not None:
+                    try:
+                        za[:] = data
+                    except (ValueError, TypeError):
+                        self.store_dict[f"{za.path}/0"] = kwargs["filters"][0].encode(
+                            data
+                        )
+                    return
 
                 # Store chunk location metadata...
                 if cinfo:
@@ -493,7 +511,11 @@ def _translator(
                         if h5obj.fletcher32:
                             logging.info("Discarding fletcher32 checksum")
                             v["size"] -= 4
-                        key =  str.removeprefix(h5obj.name, "/") + "/" + ".".join(map(str, k))
+                        key = (
+                            str.removeprefix(h5obj.name, "/")
+                            + "/"
+                            + ".".join(map(str, k))
+                        )
 
                         if (
                             self.inline
@@ -681,4 +703,3 @@ def _is_netcdf_variable(dataset: h5py.Dataset):
 
 def has_visititems_links():
     return hasattr(h5py.Group, "visititems_links")
-
diff --git a/kerchunk/netCDF3.py b/kerchunk/netCDF3.py
index 457aafbb..d5356876 100644
--- a/kerchunk/netCDF3.py
+++ b/kerchunk/netCDF3.py
@@ -6,7 +6,13 @@
 from fsspec.implementations.reference import LazyReferenceMapper
 import fsspec
 
-from kerchunk.utils import _encode_for_JSON, dict_to_store, inline_array, translate_refs_serializable
+import kerchunk.utils
+from kerchunk.utils import (
+    _encode_for_JSON,
+    dict_to_store,
+    inline_array,
+    translate_refs_serializable,
+)
 
 try:
     from scipy.io._netcdf import ZERO, NC_VARIABLE, netcdf_file, netcdf_variable
@@ -255,7 +261,7 @@ def translate(self):
                     fill_value=fill,
                     chunks=(1,) + dtype.shape,
                     compressor=None,
-                    exists_ok=True,
+                    overwrite=True,
                 )
                 arr.attrs.update(
                     {
@@ -288,13 +294,13 @@ def translate(self):
                 if k != "filename"  # special "attribute"
             }
         )
+        out = kerchunk.utils.translate_refs_serializable(out)
         if self.threshold:
             out = inline_array(
                 out,
                 self.threshold,
                 remote_options=dict(remote_options=self.storage_options),
             )
-
         if isinstance(out, LazyReferenceMapper):
             out.flush()
             return out
diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py
index 0cfb9505..054291a4 100644
--- a/kerchunk/tests/test_combine.py
+++ b/kerchunk/tests/test_combine.py
@@ -4,7 +4,7 @@
 import dask.array as da
 import pytest
 import xarray as xr
-import zarr
+import zarr.storage
 
 import kerchunk.combine
 from kerchunk.zarr import single_zarr
@@ -132,20 +132,23 @@
 xr.Dataset({"data": data}).to_zarr("memory://quad_2chunk2.zarr")
 
 # simple time arrays - xarray can't make these!
-m = fs.get_mapper("time1.zarr")
-z = zarr.open(m, mode="w", zarr_format=2)
+z = zarr.open("memory://time1.zarr", mode="w", zarr_format=2)
 time1_array = np.array([1], dtype="M8[s]")
-ar = z.create_array("time", data=time1_array, shape=time1_array.shape)
+ar = z.create_array("time", shape=time1_array.shape, dtype=time1_array.dtype)
+ar[:] = time1_array
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time"]})
-ar = z.create_array("data", data=arr, shape=arr.shape)
+ar = z.create_array("data", dtype=arr.dtype, shape=arr.shape)
+ar[:] = arr
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time", "x", "y"]})
 
-m = fs.get_mapper("time2.zarr")
-z = zarr.open(m, mode="w", zarr_format=2)
+
+z = zarr.open("memory://ime2.zarr", mode="w", zarr_format=2)
 time2_array = np.array([2], dtype="M8[s]")
-ar = z.create_array("time", data=time2_array, shape=time2_array.shape)
+ar = z.create_array("time", dtype=time2_array.dtype, shape=time2_array.shape)
+ar[:] = time2_array
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time"]})
-ar = z.create_array("data", data=arr, shape=arr.shape)
+ar = z.create_array("data", dtype=arr.dtype, shape=arr.shape)
+ar[:] = arr
 ar.attrs.update({"_ARRAY_DIMENSIONS": ["time", "x", "y"]})
 
 
@@ -228,8 +231,9 @@ def refs():
 def test_fixture(refs):
     # effectively checks that single_zarr works
     assert "single1" in refs
-    m = fsspec.get_mapper("reference://", fo=refs["single1"], remote_protocol="memory")
-    g = xr.open_dataset(m, engine="zarr", backend_kwargs={"consolidated": False})
+    fs = fsspec.filesystem("reference", fo=refs["single1"], remote_protocol="memory")
+    store = zarr.storage.FsspecStore(fs)
+    g = xr.open_dataset(store, engine="zarr", backend_kwargs={"consolidated": False})
     assert g.time.values.tolist() == [1]
     assert (g.data.values == arr).all()
     assert g.attrs["attr1"] == 5
diff --git a/kerchunk/tests/test_hdf.py b/kerchunk/tests/test_hdf.py
index 68961394..c8d6c678 100644
--- a/kerchunk/tests/test_hdf.py
+++ b/kerchunk/tests/test_hdf.py
@@ -51,7 +51,7 @@ def test_single_direct_open():
         h5f=url, inline_threshold=300, storage_options=so
     ).translate()
 
-    store = refs_as_store(test_dict)
+    store = refs_as_store(test_dict, remote_options=dict(asynchronous=True, anon=True))
 
     ds_direct = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
@@ -61,7 +61,7 @@ def test_single_direct_open():
         h5chunks = SingleHdf5ToZarr(f, url, storage_options=so)
         test_dict = h5chunks.translate()
 
-    store = refs_as_store(test_dict)
+    store = refs_as_store(test_dict, remote_options=dict(asynchronous=True, anon=True))
 
     ds_from_file_opener = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
@@ -88,7 +88,7 @@ def test_multizarr(generate_mzz):
     mzz = generate_mzz
     test_dict = mzz.translate()
 
-    store = refs_as_store(test_dict)
+    store = refs_as_store(test_dict, remote_options=dict(asynchronous=True, anon=True))
     ds = xr.open_dataset(
         store, engine="zarr", zarr_format=2, backend_kwargs=dict(consolidated=False)
     )
@@ -196,12 +196,12 @@ def test_times_str(times_data):
 
 def test_string_embed():
     fn = osp.join(here, "vlen.h5")
-    h = kerchunk.hdf.SingleHdf5ToZarr(fn, fn, vlen_encode="embed")
+    h = kerchunk.hdf.SingleHdf5ToZarr(fn, fn, vlen_encode="embed", error="pdb")
     out = h.translate()
 
     localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
     fs = refs_as_fs(out, fs=localfs)
-    #assert txt in fs.references["vlen_str/0"]
+    # assert txt in fs.references["vlen_str/0"]
     store = fs_as_store(fs)
     z = zarr.open(store, zarr_format=2)
     assert z["vlen_str"].dtype == "O"
@@ -227,7 +227,7 @@ def test_string_leave():
             f, fn, vlen_encode="leave", inline_threshold=0
         )
         out = h.translate()
-    
+
     localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
     store = refs_as_store(out, fs=localfs)
     z = zarr.open(store, zarr_format=2)
@@ -328,7 +328,7 @@ def test_compress():
 
 def test_embed():
     fn = osp.join(here, "NEONDSTowerTemperatureData.hdf5")
-    h = kerchunk.hdf.SingleHdf5ToZarr(fn, vlen_encode="embed")
+    h = kerchunk.hdf.SingleHdf5ToZarr(fn, vlen_encode="embed", error="pdb")
     out = h.translate()
 
     localfs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
diff --git a/kerchunk/tests/test_tiff.py b/kerchunk/tests/test_tiff.py
index b81e7bab..3e4ea1c7 100644
--- a/kerchunk/tests/test_tiff.py
+++ b/kerchunk/tests/test_tiff.py
@@ -36,8 +36,8 @@ def test_coord():
     fn = files[0]
     out = kerchunk.tiff.tiff_to_zarr(fn)
     store = refs_as_store(out)
-    z = zarr.open(out, zarr_format=2)  # highest res is the one xarray picks
-    out = kerchunk.tiff.generate_coords(z.attrs, z[0].shape)
+    z = zarr.open(store, zarr_format=2)  # highest res is the one xarray picks
+    out = kerchunk.tiff.generate_coords(z.attrs, z["0"].shape)
 
     ds = xr.open_dataset(fn)
     assert (ds.x == out["x"]).all()
diff --git a/kerchunk/tests/test_utils.py b/kerchunk/tests/test_utils.py
index 5cbfb150..f6c7e5ef 100644
--- a/kerchunk/tests/test_utils.py
+++ b/kerchunk/tests/test_utils.py
@@ -81,13 +81,13 @@ def test_inline_array():
 
     localfs = fsspec.filesystem("file")
     store = kerchunk.utils.refs_as_store(out2, fs=localfs)
-    g = zarr.open(store, mode='r', zarr_format=2)
-    assert g["data"][:].tolist() == [1, 2] # What is g.data???
+    g = zarr.open(store, mode="r", zarr_format=2)
+    assert g["data"][:].tolist() == [1, 2]  # What is g.data???
 
     out3 = kerchunk.utils.inline_array(refs, threshold=1000)  # inlines because of size
     store = kerchunk.utils.refs_as_store(out3, localfs)
-    g = zarr.open(store, mode='r', zarr_format=2)
-    assert g["data"][:].tolist() == [1, 2] # What is g.data???
+    g = zarr.open(store, mode="r", zarr_format=2)
+    assert g["data"][:].tolist() == [1, 2]  # What is g.data???
 
 
 def test_json():
@@ -99,28 +99,30 @@ def test_json():
 
 @pytest.mark.parametrize("chunks", [[10, 10], [5, 10]])
 def test_subchunk_exact(m, chunks):
-    store = m.get_mapper("test.zarr")
-    g = zarr.open_group(store, mode="w", zarr_format=2)
+    g = zarr.open_group("memory://test.zarr", mode="w", zarr_format=2)
     data = np.arange(100).reshape(10, 10)
-    arr = g.create_dataset("data", data=data, chunks=chunks, compressor=None)
+    arr = g.create_array(
+        "data", dtype=data.dtype, shape=data.shape, chunks=chunks, compressor=None
+    )
+    arr[:] = data
     ref = kerchunk.zarr.single_zarr("memory://test.zarr")["refs"]
 
     extra = [] if chunks[0] == 10 else ["data/1.0"]
-    assert list(ref) == [".zgroup", "data/.zarray", "data/0.0"] + extra
+    ref2 = list(_ for _ in ref if not _.endswith("zattrs"))  # ignore empty attrs
+    assert ref2 == [".zgroup", "data/.zarray", "data/0.0"] + extra
 
     out = kerchunk.utils.subchunk(ref, "data", 5)
     nchunk = 10 // chunks[0] * 5
-    assert list(out) == [".zgroup", "data/.zarray"] + [
-        f"data/{_}.0" for _ in range(nchunk)
-    ]
+    out2 = list(_ for _ in out if not _.endswith("zattrs"))
+    assert out2 == [".zgroup", "data/.zarray"] + [f"data/{_}.0" for _ in range(nchunk)]
 
     store = kerchunk.utils.refs_as_store(out, remote_protocol="memory")
-    g2 = zarr.open_group(store, mode='r', zarr_format=2)
+    g2 = zarr.open_group(store, mode="r", zarr_format=2)
 
     # g2 = zarr.open_group(
     #     "reference://", storage_options={"fo": out, "remote_protocol": "memory"}, zarr_format=2
     # )
-    assert (g2.data[:] == data).all()
+    assert (g2["data"][:] == data).all()
 
 
 @pytest.mark.parametrize("archive", ["zip", "tar"])
diff --git a/kerchunk/tests/test_zarr.py b/kerchunk/tests/test_zarr.py
index 3c02fc69..27063541 100644
--- a/kerchunk/tests/test_zarr.py
+++ b/kerchunk/tests/test_zarr.py
@@ -46,7 +46,7 @@ def _zip(file):
         return filename
 
     fn = f"{tmpdir}/test.zarr"
-    ds.to_zarr(fn, mode="w")
+    ds.to_zarr(fn, mode="w", zarr_format=2)
     return _zip(fn)
 
 
@@ -90,7 +90,7 @@ def test_zarr_combine(tmpdir, ds):
 
 def test_zarr_json_dump_succeeds(tmpdir, ds):
     fn1 = f"{tmpdir}/test1.zarr"
-    ds.to_zarr(fn1)
+    ds.to_zarr(fn1, zarr_format=2)
 
     one = kerchunk.zarr.ZarrToZarr(
         fn1,
diff --git a/kerchunk/utils.py b/kerchunk/utils.py
index b8a53e3c..9bdce3f8 100644
--- a/kerchunk/utils.py
+++ b/kerchunk/utils.py
@@ -8,12 +8,19 @@
 
 import ujson
 
-import fsspec
+import fsspec.implementations.asyn_wrapper
 import numpy as np
-import zarr
+import zarr.storage
 
 
-def refs_as_fs(refs, fs=None, remote_protocol=None, remote_options=None, **kwargs):
+def refs_as_fs(
+    refs,
+    fs=None,
+    remote_protocol=None,
+    remote_options=None,
+    asynchronous=True,
+    **kwargs,
+):
     """Convert a reference set to an fsspec filesystem"""
     fs = fsspec.filesystem(
         "reference",
@@ -22,7 +29,7 @@ def refs_as_fs(refs, fs=None, remote_protocol=None, remote_options=None, **kwarg
         remote_protocol=remote_protocol,
         remote_options=remote_options,
         **kwargs,
-        asynchronous=True,
+        asynchronous=asynchronous,
     )
     return fs
 
@@ -31,11 +38,8 @@ def refs_as_store(
     refs, read_only=False, fs=None, remote_protocol=None, remote_options=None
 ):
     """Convert a reference set to a zarr store"""
-    if is_zarr3():
-        if remote_options is None:
-            remote_options = {"asynchronous": True}
-        else:
-            remote_options["asynchronous"] = True
+    remote_options = remote_options or {}
+    remote_options["asynchronous"] = True
 
     fss = refs_as_fs(
         refs,
@@ -65,26 +69,23 @@ def fs_as_store(fs: fsspec.asyn.AsyncFileSystem, read_only=False):
     Parameters
     ----------
     fs: fsspec.async.AsyncFileSystem
-    mode: str
+    read_only: bool
 
     Returns
     -------
     zarr.storage.Store or zarr.storage.Mapper, fsspec.AbstractFileSystem
     """
-    if is_zarr3():
-        if not fs.async_impl:
-            try:
-                from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
+    if not fs.async_impl:
+        try:
+            from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
 
-                fs = AsyncFileSystemWrapper(fs)
-            except ImportError:
-                raise ImportError(
-                    "Only fsspec>2024.10.0 supports the async filesystem wrapper required for working with reference filesystems. "
-                )
-        fs.asynchronous = True
-        return zarr.storage.RemoteStore(fs, read_only=read_only)
-    else:
-        return fs.get_mapper()
+            fs = AsyncFileSystemWrapper(fs)
+        except ImportError:
+            raise ImportError(
+                "Only fsspec>2024.10.0 supports the async filesystem wrapper required for working with reference filesystems. "
+            )
+    fs.asynchronous = True
+    return zarr.storage.FsspecStore(fs, read_only=read_only)
 
 
 def class_factory(func):
@@ -259,14 +260,11 @@ def do_inline(store, threshold, remote_options=None, remote_protocol=None):
     The chunk may need encoding with base64 if not ascii, so actual
     length may be larger than threshold.
     """
-    fs = fsspec.filesystem(
-        "reference",
-        fo=store,
-        remote_options=remote_options,
-        remote_protocol=remote_protocol,
-    )
     fs = refs_as_fs(
-        store, remote_protocol=remote_protocol, remote_options=remote_options
+        store,
+        remote_protocol=remote_protocol,
+        remote_options=remote_options,
+        asynchronous=False,
     )
     out = fs.references.copy()
 
@@ -308,15 +306,15 @@ def _inline_array(group, threshold, names, prefix=""):
             cond2 = prefix1 in names
             if cond1 or cond2:
                 original_attrs = dict(thing.attrs)
-                arr = group.create_dataset(
+                arr = group.create_array(
                     name=name,
                     dtype=thing.dtype,
                     shape=thing.shape,
-                    data=thing[:],
                     chunks=thing.shape,
                     fill_value=thing.fill_value,
-                    exists_ok=True,
+                    overwrite=True,
                 )
+                arr[:] = thing[:]
                 arr.attrs.update(original_attrs)
 
 
@@ -369,7 +367,7 @@ def subchunk(store, variable, factor):
     -------
     modified store
     """
-    fs = refs_as_fs(store)
+    fs = fsspec.filesystem("reference", fo=store)
     store = fs.references
     meta_file = f"{variable}/.zarray"
     meta = ujson.loads(fs.cat(meta_file))
@@ -419,7 +417,7 @@ def subchunk(store, variable, factor):
             else:
                 (url,) = v
                 offset = 0
-                size = fs.size(k)
+                size = fs.info(k)["size"]
             for subpart in range(factor):
                 new_index = (
                     chunk_index[:ind]
diff --git a/kerchunk/xarray_backend.py b/kerchunk/xarray_backend.py
index 0620614b..79976d57 100644
--- a/kerchunk/xarray_backend.py
+++ b/kerchunk/xarray_backend.py
@@ -43,7 +43,7 @@ def open_reference_dataset(
     if open_dataset_options is None:
         open_dataset_options = {}
 
-    store = refs_as_store(filename_or_obj, remote_options=storage_options)
+    store = refs_as_store(filename_or_obj, **storage_options)
 
     return xr.open_zarr(
         store, zarr_format=2, consolidated=False, **open_dataset_options
diff --git a/kerchunk/zarr.py b/kerchunk/zarr.py
index ea0612de..083e0f48 100644
--- a/kerchunk/zarr.py
+++ b/kerchunk/zarr.py
@@ -2,6 +2,7 @@
 from fsspec.implementations.reference import LazyReferenceMapper
 
 import kerchunk.utils
+import ujson
 
 
 def single_zarr(
@@ -35,11 +36,20 @@ def single_zarr(
     """
     if isinstance(uri_or_store, str):
         mapper = fsspec.get_mapper(uri_or_store, **(storage_options or {}))
+        protocol = mapper.fs.unstrip_protocol("").rstrip("://")
     else:
         mapper = uri_or_store
         if isinstance(mapper, fsspec.FSMap) and storage_options is None:
             storage_options = mapper.fs.storage_options
+            protocol = mapper.fs.unstrip_protocol("").rstrip("://")
+        else:
+            protocol = None
 
+    try:
+        check = ujson.loads(mapper[".zgroup"])
+        assert check["zarr_format"] == 2
+    except (KeyError, ValueError, TypeError) as e:
+        raise ValueError("Failed to load dataset as V2 zarr") from e
     refs = out or {}
     for k in mapper:
         if k.startswith("."):
@@ -50,7 +60,12 @@ def single_zarr(
 
     inline_threshold = inline or inline_threshold
     if inline_threshold:
-        refs = do_inline(refs, inline_threshold, remote_options=storage_options)
+        refs = do_inline(
+            refs,
+            inline_threshold,
+            remote_options=storage_options,
+            remote_protocol=protocol,
+        )
     if isinstance(refs, LazyReferenceMapper):
         refs.flush()
     refs = kerchunk.utils.consolidate(refs)
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..e83bb177
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+asyncio_default_fixture_loop_scope=session

From 95f340fa874c0c3ed6ccf8f9f98bc2f7a692c1cc Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Thu, 16 Jan 2025 10:24:28 -0500
Subject: [PATCH 40/40] min python

---
 .github/workflows/tests.yml |  2 +-
 ci/environment-py310.yml    | 36 -----------------
 pyproject.toml              |  2 +-
 tests/test_grib.py          | 80 ++++++++++++++++++-------------------
 4 files changed, 40 insertions(+), 80 deletions(-)
 delete mode 100644 ci/environment-py310.yml

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 90d8bb9d..0a31f183 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [310, 311, 312]
+        python-version: [311, 312]
 
     steps:
       - uses: actions/checkout@v4
diff --git a/ci/environment-py310.yml b/ci/environment-py310.yml
deleted file mode 100644
index 970acd42..00000000
--- a/ci/environment-py310.yml
+++ /dev/null
@@ -1,36 +0,0 @@
-name: test_env
-channels:
-  - conda-forge
-  - nodefaults
-dependencies:
-  - python=3.10
-  - dask
-  - zarr
-  - xarray>=2024.10.0
-  - h5netcdf
-  - h5py
-  - pandas
-  - cfgrib
-  - cftime
-  # Temporary workaround for #508
-  - eccodes <2.38
-
-  - astropy
-  - requests
-  - aiohttp
-  - pytest-cov
-  - fsspec
-  - dask
-  - scipy
-  - s3fs
-  - python-blosc
-  - flake8
-  - black
-  - fastparquet
-  - pip
-  - pyopenssl
-  - tifffile
-  - netCDF4
-  - pip:
-      - git+https://github.com/fsspec/filesystem_spec
-      - ipfsspec
diff --git a/pyproject.toml b/pyproject.toml
index 4d3f9832..ca28f8ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "kerchunk"
 description = "Functions to make reference descriptions for ReferenceFileSystem"
 readme = "README.md"
-requires-python = ">=3.7"
+requires-python = ">=3.11"
 dynamic = ["version"]
 license = {text = "MIT"}
 authors = [
diff --git a/tests/test_grib.py b/tests/test_grib.py
index 5edb42d9..2c5387fd 100644
--- a/tests/test_grib.py
+++ b/tests/test_grib.py
@@ -6,7 +6,6 @@
 import pandas as pd
 import pytest
 import xarray as xr
-#import datatree
 import zarr
 import ujson
 from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
@@ -36,11 +35,7 @@ def test_one():
 
     fs = AsyncFileSystemWrapper(fsspec.filesystem("file"))
     store = refs_as_store(out[0], fs=fs)
-    ds = xr.open_zarr(
-        store, 
-        zarr_format=2,
-        consolidated=False
-    )
+    ds = xr.open_zarr(store, zarr_format=2, consolidated=False)
 
     assert ds.attrs["GRIB_centre"] == "cwao"
     ds2 = xr.open_dataset(fn, engine="cfgrib", backend_kwargs={"indexpath": ""})
@@ -76,11 +71,7 @@ def test_archives(tmpdir, url):
 
     store = refs_as_store(out, remote_options={"anon": True})
 
-    ours = xr.open_zarr(
-        store,
-        zarr_format=2,
-        consolidated=False
-    )
+    ours = xr.open_zarr(store, zarr_format=2, consolidated=False)
 
     data = _fetch_first(url)
     fn = os.path.join(tmpdir, "grib.grib2")
@@ -131,7 +122,7 @@ def test_grib_tree():
         "atmosphere latitude longitude step time valid_time".split()
     )
     # Assert that the fill value is set correctly
-    assert np.isnan(zg['refc/instant/atmosphere/step'].fill_value)
+    assert np.isnan(zg["refc/instant/atmosphere/step"].fill_value)
 
 
 # The following two tests use json fixture data generated from calling scan grib
@@ -149,14 +140,18 @@ def test_correct_hrrr_subhf_group_step():
         scanned_msgs = ujson.load(fobj)
 
     original_zg = [
-        zarr.open_group(fs_as_store(fsspec.filesystem("reference", fo=val)), mode="r", zarr_format=2)
+        zarr.open_group(
+            fs_as_store(fsspec.filesystem("reference", fo=val)), mode="r", zarr_format=2
+        )
         for val in scanned_msgs
     ]
 
     corrected_msgs = [correct_hrrr_subhf_step(msg) for msg in scanned_msgs]
 
     corrected_zg = [
-        zarr.open_group(fs_as_store(fsspec.filesystem("reference", fo=val)), mode="r", zarr_format=2)
+        zarr.open_group(
+            fs_as_store(fsspec.filesystem("reference", fo=val)), mode="r", zarr_format=2
+        )
         for val in corrected_msgs
     ]
 
@@ -183,28 +178,29 @@ def test_hrrr_subhf_corrected_grib_tree():
     zstore = fs_as_store(z_fs)
     zg = zarr.open_group(zstore, mode="r", zarr_format=2)
     # Check the values and shape of the time coordinates
-    assert zg['u/instant/heightAboveGround/step'][:].tolist() == [
+    assert zg["u/instant/heightAboveGround/step"][:].tolist() == [
         0.0,
         0.25,
         0.5,
         0.75,
         1.0,
     ]
-    assert zg['u/instant/heightAboveGround/step'].shape == (5,)
-    assert zg['u/instant/heightAboveGround/valid_time'][:].tolist() == [
+    assert zg["u/instant/heightAboveGround/step"].shape == (5,)
+    assert zg["u/instant/heightAboveGround/valid_time"][:].tolist() == [
         [1695862800, 1695863700, 1695864600, 1695865500, 1695866400]
     ]
-    assert zg['u/instant/heightAboveGround/valid_time'].shape == (1, 5)
-    assert zg['u/instant/heightAboveGround/time'][:].tolist() == [1695862800]
-    assert zg['u/instant/heightAboveGround/time'].shape == (1,)
-    assert zg['dswrf/avg/surface/step'][:].tolist() == [0.0, 0.25, 0.5, 0.75, 1.0]
-    assert zg['dswrf/avg/surface/step'].shape == (5,)
-    assert zg['dswrf/avg/surface/valid_time'][:].tolist() == [
+    assert zg["u/instant/heightAboveGround/valid_time"].shape == (1, 5)
+    assert zg["u/instant/heightAboveGround/time"][:].tolist() == [1695862800]
+    assert zg["u/instant/heightAboveGround/time"].shape == (1,)
+    assert zg["dswrf/avg/surface/step"][:].tolist() == [0.0, 0.25, 0.5, 0.75, 1.0]
+    assert zg["dswrf/avg/surface/step"].shape == (5,)
+    assert zg["dswrf/avg/surface/valid_time"][:].tolist() == [
         [1695862800, 1695863700, 1695864600, 1695865500, 1695866400]
     ]
-    assert zg['dswrf/avg/surface/valid_time'].shape == (1, 5)
-    assert zg['dswrf/avg/surface/time'][:].tolist() == [1695862800]
-    assert zg['dswrf/avg/surface/time'].shape == (1,)
+    assert zg["dswrf/avg/surface/valid_time"].shape == (1, 5)
+    assert zg["dswrf/avg/surface/time"][:].tolist() == [1695862800]
+    assert zg["dswrf/avg/surface/time"].shape == (1,)
+
 
 # The following two test use json fixture data generated from calling scan grib
 #   scan_grib("testdata/hrrr.t01z.wrfsfcf00.grib2")
@@ -221,19 +217,19 @@ def test_hrrr_sfcf_grib_tree():
     store = fs_as_store(fsspec.filesystem("reference", fo=merged))
     zg = zarr.open_group(store, mode="r", zarr_format=2)
     # Check the heightAboveGround level shape of the time coordinates
-    assert zg['u/instant/heightAboveGround/heightAboveGround'][()] == 80.0
-    assert zg['u/instant/heightAboveGround/heightAboveGround'].shape == ()
-    assert zg['u/instant/heightAboveGround/step'][:].tolist() == [0.0, 1.0]
-    assert zg['u/instant/heightAboveGround/step'].shape == (2,)
-    assert zg['u/instant/heightAboveGround/valid_time'][:].tolist() == [
+    assert zg["u/instant/heightAboveGround/heightAboveGround"][()] == 80.0
+    assert zg["u/instant/heightAboveGround/heightAboveGround"].shape == ()
+    assert zg["u/instant/heightAboveGround/step"][:].tolist() == [0.0, 1.0]
+    assert zg["u/instant/heightAboveGround/step"].shape == (2,)
+    assert zg["u/instant/heightAboveGround/valid_time"][:].tolist() == [
         [1695862800, 1695866400]
     ]
-    assert zg['u/instant/heightAboveGround/valid_time'].shape == (1, 2)
-    assert zg['u/instant/heightAboveGround/time'][:].tolist() == [1695862800]
-    assert zg['u/instant/heightAboveGround/time'].shape == (1,)
+    assert zg["u/instant/heightAboveGround/valid_time"].shape == (1, 2)
+    assert zg["u/instant/heightAboveGround/time"][:].tolist() == [1695862800]
+    assert zg["u/instant/heightAboveGround/time"].shape == (1,)
 
     # Check the isobaricInhPa level shape and time coordinates
-    assert zg['u/instant/isobaricInhPa/isobaricInhPa'][:].tolist() == [
+    assert zg["u/instant/isobaricInhPa/isobaricInhPa"][:].tolist() == [
         250.0,
         300.0,
         500.0,
@@ -242,9 +238,9 @@ def test_hrrr_sfcf_grib_tree():
         925.0,
         1000.0,
     ]
-    assert zg['u/instant/isobaricInhPa/isobaricInhPa'].shape == (7,)
-    assert zg['u/instant/isobaricInhPa/step'][:].tolist() == [0.0, 1.0]
-    assert zg['u/instant/isobaricInhPa/step'].shape == (2,)
+    assert zg["u/instant/isobaricInhPa/isobaricInhPa"].shape == (7,)
+    assert zg["u/instant/isobaricInhPa/step"][:].tolist() == [0.0, 1.0]
+    assert zg["u/instant/isobaricInhPa/step"].shape == (2,)
 
     # Valid time values get exploded by isobaricInhPa aggregation
     # Is this a feature or a bug?
@@ -254,11 +250,11 @@ def test_hrrr_sfcf_grib_tree():
             [1695866400 for _ in range(7)],
         ]
     ]
-    assert zg['u/instant/isobaricInhPa/valid_time'][:].tolist() == expected_valid_times
-    assert zg['u/instant/isobaricInhPa/valid_time'].shape == (1, 2, 7)
+    assert zg["u/instant/isobaricInhPa/valid_time"][:].tolist() == expected_valid_times
+    assert zg["u/instant/isobaricInhPa/valid_time"].shape == (1, 2, 7)
 
-    assert zg['u/instant/isobaricInhPa/time'][:].tolist() == [1695862800]
-    assert zg['u/instant/isobaricInhPa/time'].shape == (1,)
+    assert zg["u/instant/isobaricInhPa/time"][:].tolist() == [1695862800]
+    assert zg["u/instant/isobaricInhPa/time"].shape == (1,)
 
 
 # def test_hrrr_sfcf_grib_datatree():