You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
In [1]: import numpy as np; import pandas as pd; import xarray as xr
In [2]: times = pd.date_range('2000', periods=3)
In [3]: da = xr.DataArray(times, dims=['a'], coords=[[1, 2, 3]], name='foo')
In [4]: da.encoding['_FillValue'] = 1.0e20
In [5]: da.encoding['dtype'] = np.dtype('float64')
In [6]: da.to_dataset().to_netcdf('test.nc')
---------------------------------------------------------------------------
OverflowError Traceback (most recent call last)
<ipython-input-6-cbc6b2cfdf9a> in <module>
----> 1 da.to_dataset().to_netcdf('test.nc')
~/Software/xarray/xarray/core/dataset.py in to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
1548 unlimited_dims=unlimited_dims,
1549 compute=compute,
-> 1550 invalid_netcdf=invalid_netcdf,
1551 )
1552
~/Software/xarray/xarray/backends/api.py in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
1071 # to be parallelized with dask
1072 dump_to_store(
-> 1073 dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims
1074 )
1075 if autoclose:
~/Software/xarray/xarray/backends/api.py in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
1117 variables, attrs = encoder(variables, attrs)
1118
-> 1119 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
1120
1121
~/Software/xarray/xarray/backends/common.py in store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
291 writer = ArrayWriter()
292
--> 293 variables, attributes = self.encode(variables, attributes)
294
295 self.set_attributes(attributes)
~/Software/xarray/xarray/backends/common.py in encode(self, variables, attributes)
380 # All NetCDF files get CF encoded by default, without this attempting
381 # to write times, for example, would fail.
--> 382 variables, attributes = cf_encoder(variables, attributes)
383 variables = {k: self.encode_variable(v) for k, v in variables.items()}
384 attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}
~/Software/xarray/xarray/conventions.py in cf_encoder(variables, attributes)
758 _update_bounds_encoding(variables)
759
--> 760 new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()}
761
762 # Remove attrs from bounds variables (issue #2921)
~/Software/xarray/xarray/conventions.py in <dictcomp>(.0)
758 _update_bounds_encoding(variables)
759
--> 760 new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()}
761
762 # Remove attrs from bounds variables (issue #2921)
~/Software/xarray/xarray/conventions.py in encode_cf_variable(var, needs_copy, name)
248 variables.UnsignedIntegerCoder(),
249 ]:
--> 250 var = coder.encode(var, name=name)
251
252 # TODO(shoyer): convert all of these to use coders, too:
~/Software/xarray/xarray/coding/variables.py in encode(self, variable, name)
163 if fv is not None:
164 # Ensure _FillValue is cast to same dtype as data's
--> 165 encoding["_FillValue"] = data.dtype.type(fv)
166 fill_value = pop_to(encoding, attrs, "_FillValue", name=name)
167 if not pd.isnull(fill_value):
OverflowError: Python int too large to convert to C long
Expected Output
I think this should succeed in writing to a netCDF file (it worked in xarray 0.14.0 and earlier).
Problem Description
I think this (admittedly very subtle) issue was introduced in #3502. Essentially at the time data enters CFMaskCoder.encode it does not necessarily have the dtype it will ultimately be encoded with. In the case of this example, data has type int64, but when it will be stored in the netCDF file it will be a double-precision float.
A possible solution here might be to rely on encoding['dtype'] (if it exists) to determine the type to cast the encoding values for '_FillValue' and 'missing_value' to, instead of relying solely on data.dtype (maybe use that as a fallback).
MCVE Code Sample
Expected Output
I think this should succeed in writing to a netCDF file (it worked in xarray 0.14.0 and earlier).
Problem Description
I think this (admittedly very subtle) issue was introduced in #3502. Essentially at the time
data
entersCFMaskCoder.encode
it does not necessarily have thedtype
it will ultimately be encoded with. In the case of this example,data
has typeint64
, but when it will be stored in the netCDF file it will be a double-precision float.A possible solution here might be to rely on
encoding['dtype']
(if it exists) to determine the type to cast the encoding values for'_FillValue'
and'missing_value'
to, instead of relying solely ondata.dtype
(maybe use that as a fallback).cc: @spencerahill
Output of
xr.show_versions()
xarray: master
pandas: 0.25.3
numpy: 1.17.3
scipy: 1.3.2
netCDF4: 1.5.3
pydap: None
h5netcdf: None
h5py: None
Nio: None
zarr: None
cftime: 1.0.4.2
nc_time_axis: None
PseudoNetCDF: None
rasterio: None
cfgrib: None
iris: None
bottleneck: None
dask: 2.9.0
distributed: 2.9.0
matplotlib: 3.1.2
cartopy: None
seaborn: None
numbagg: None
setuptools: 42.0.2.post20191203
pip: 19.3.1
conda: None
pytest: 5.3.2
IPython: 7.10.1
sphinx: None
The text was updated successfully, but these errors were encountered: