Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Support using opened netCDF4.Dataset (Fixes #1459) #1508

Merged
merged 1 commit into from
Aug 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ Enhancements
(:issue:`576`).
By `Stephan Hoyer <https://github.com/shoyer>`_.

- Support using an existing, opened netCDF4 ``Dataset`` with
:py:class:`~xarray.backends.NetCDF4DataStore`. This permits creating an
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about moving the current constructor logic into a classmethod NetCDF4DataStore.open(filename, mode, format, group, writer, clobber, diskless, persist, autoclose)

And adjusting the constructor: NetCDF4DataStore.__init__(self, netcdf4_dataset, opener=None, writer=None, autoclose=None).

Right now, I don't think anyone is using the NetCDF4DataStore constructor directly -- there's literally no good reason for that. This also gives us a pattern we could use for other constructors (e.g., pydap) where passing in an existing object is desirable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm happy to refactor like that--I just didn't think it was on the table.

:py:class:`~xarray.Dataset` from a netCDF4 ``Dataset`` that has been opened using
other means (:issue:`1459`).
By `Ryan May <https://github.com/dopplershift>`_.

Bug fixes
~~~~~~~~~

Expand Down
11 changes: 6 additions & 5 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,9 @@ def maybe_decode_store(store, lock=False):
engine = _get_default_engine(filename_or_obj,
allow_remote=True)
if engine == 'netcdf4':
store = backends.NetCDF4DataStore(filename_or_obj, group=group,
autoclose=autoclose)
store = backends.NetCDF4DataStore.open(filename_or_obj,
group=group,
autoclose=autoclose)
elif engine == 'scipy':
store = backends.ScipyDataStore(filename_or_obj,
autoclose=autoclose)
Expand Down Expand Up @@ -518,7 +519,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT,
return combined


WRITEABLE_STORES = {'netcdf4': backends.NetCDF4DataStore,
WRITEABLE_STORES = {'netcdf4': backends.NetCDF4DataStore.open,
'scipy': backends.ScipyDataStore,
'h5netcdf': backends.H5NetCDFStore}

Expand Down Expand Up @@ -553,7 +554,7 @@ def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None,
_validate_attrs(dataset)

try:
store_cls = WRITEABLE_STORES[engine]
store_open = WRITEABLE_STORES[engine]
except KeyError:
raise ValueError('unrecognized engine for to_netcdf: %r' % engine)

Expand All @@ -564,7 +565,7 @@ def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None,
sync = writer is None

target = path_or_file if path_or_file is not None else BytesIO()
store = store_cls(target, mode, format, group, writer)
store = store_open(target, mode, format, group, writer)

if unlimited_dims is None:
unlimited_dims = dataset.encoding.get('unlimited_dims', None)
Expand Down
45 changes: 33 additions & 12 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,35 +187,56 @@ def _open_netcdf4_group(filename, mode, group=None, **kwargs):
with close_on_error(ds):
ds = _nc4_group(ds, group, mode)

_disable_mask_and_scale(ds)

return ds


def _disable_mask_and_scale(ds):
for var in ds.variables.values():
# we handle masking and scaling ourselves
var.set_auto_maskandscale(False)
return ds


class NetCDF4DataStore(WritableCFDataStore, DataStorePickleMixin):
"""Store for reading and writing data via the Python-NetCDF4 library.

This store supports NetCDF3, NetCDF4 and OpenDAP datasets.
"""
def __init__(self, filename, mode='r', format='NETCDF4', group=None,
writer=None, clobber=True, diskless=False, persist=False,
def __init__(self, netcdf4_dataset, mode='r', writer=None, opener=None,
autoclose=False):

if autoclose and opener is None:
raise ValueError('autoclose requires an opener')

_disable_mask_and_scale(netcdf4_dataset)

self.ds = netcdf4_dataset
self._autoclose = autoclose
self._isopen = True
self.format = self.ds.data_model
self._filename = self.ds.filepath()
self.is_remote = is_remote_uri(self._filename)
self._mode = mode = 'a' if mode == 'w' else mode
if opener:
self._opener = functools.partial(opener, mode=self._mode)
else:
self._opener = opener
super(NetCDF4DataStore, self).__init__(writer)

@classmethod
def open(cls, filename, mode='r', format='NETCDF4', group=None,
writer=None, clobber=True, diskless=False, persist=False,
autoclose=False):
if format is None:
format = 'NETCDF4'
opener = functools.partial(_open_netcdf4_group, filename, mode=mode,
group=group, clobber=clobber,
diskless=diskless, persist=persist,
format=format)
self.ds = opener()
self._autoclose = autoclose
self._isopen = True
self.format = format
self.is_remote = is_remote_uri(filename)
self._filename = filename
self._mode = 'a' if mode == 'w' else mode
self._opener = functools.partial(opener, mode=self._mode)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic on these two lines with self._mode seems a little redundant / strange, but I'm concerned that it might be important to avoid overwriting files when pickling a datastore or using autoclose. Can you restore it to __init__? I would rather tackle this clean-up in another PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, although calling partial on opener is conditional on opener existing since it's None in the case of an existing Dataset.

super(NetCDF4DataStore, self).__init__(writer)
ds = opener()
return cls(ds, mode=mode, writer=writer, opener=opener,
autoclose=autoclose)

def open_store_variable(self, name, var):
with self.ensure_open(autoclose=False):
Expand Down
22 changes: 17 additions & 5 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,18 @@ def test_0dimensional_variable(self):
expected = Dataset({'x': ((), 123)})
self.assertDatasetIdentical(expected, ds)

def test_already_open_dataset(self):
with create_tmp_file() as tmp_file:
with nc4.Dataset(tmp_file, mode='w') as nc:
v = nc.createVariable('x', 'int')
v[...] = 42

nc = nc4.Dataset(tmp_file, mode='r')
with backends.NetCDF4DataStore(nc, autoclose=False) as store:
with open_dataset(store) as ds:
expected = Dataset({'x': ((), 42)})
self.assertDatasetIdentical(expected, ds)

def test_variable_len_strings(self):
with create_tmp_file() as tmp_file:
values = np.array(['foo', 'bar', 'baz'], dtype=object)
Expand All @@ -784,7 +796,7 @@ class NetCDF4DataTest(BaseNetCDF4Test, TestCase):
@contextlib.contextmanager
def create_store(self):
with create_tmp_file() as tmp_file:
with backends.NetCDF4DataStore(tmp_file, mode='w') as store:
with backends.NetCDF4DataStore.open(tmp_file, mode='w') as store:
yield store

@contextlib.contextmanager
Expand Down Expand Up @@ -972,8 +984,8 @@ class NetCDF3ViaNetCDF4DataTest(CFEncodedDataTest, Only32BitTypes, TestCase):
@contextlib.contextmanager
def create_store(self):
with create_tmp_file() as tmp_file:
with backends.NetCDF4DataStore(tmp_file, mode='w',
format='NETCDF3_CLASSIC') as store:
with backends.NetCDF4DataStore.open(
tmp_file, mode='w', format='NETCDF3_CLASSIC') as store:
yield store

@contextlib.contextmanager
Expand All @@ -998,8 +1010,8 @@ class NetCDF4ClassicViaNetCDF4DataTest(CFEncodedDataTest, Only32BitTypes,
@contextlib.contextmanager
def create_store(self):
with create_tmp_file() as tmp_file:
with backends.NetCDF4DataStore(tmp_file, mode='w',
format='NETCDF4_CLASSIC') as store:
with backends.NetCDF4DataStore.open(
tmp_file, mode='w', format='NETCDF4_CLASSIC') as store:
yield store

@contextlib.contextmanager
Expand Down