diff --git a/cate/ds/esa_cci_odp.py b/cate/ds/esa_cci_odp.py index aeef7c62e..4b1ad46ff 100644 --- a/cate/ds/esa_cci_odp.py +++ b/cate/ds/esa_cci_odp.py @@ -853,7 +853,8 @@ def _make_local(self, remote_dataset = xr.open_dataset(dataset_uri, drop_variables=[variable.get('name') for variable in excluded_variables]) - child_monitor.progress(work=20, msg=f"Opened {dataset_uri}") + remote_dataset_root = remote_dataset + child_monitor.progress(work=20) if var_names: remote_dataset = remote_dataset.drop([var_name for var_name in remote_dataset.data_vars.keys() @@ -871,8 +872,14 @@ def _make_local(self, if compression_enabled: for sel_var_name in remote_dataset.variables.keys(): remote_dataset.variables.get(sel_var_name).encoding.update(encoding_update) - remote_dataset.to_netcdf(local_filepath) - child_monitor.progress(work=75, msg=f"Written {local_filepath}") + # Note: we are using engine='h5netcdf' here because the default engine='netcdf4' + # causes crashes in file "netCDF4/_netCDF4.pyx" with currently used netcdf4-1.4.2 conda + # package from conda-forge. This occurs whenever remote_dataset.to_netcdf() is called a + # second time in this loop. + # Probably related to https://github.com/pydata/xarray/issues/2560. + # And probably fixes Cate issues #823, #822, #818, #816, #783. + remote_dataset.to_netcdf(local_filepath, format='NETCDF4', engine='h5netcdf') + child_monitor.progress(work=75) if do_update_of_variables_meta_info_once: variables_info = local_ds.meta_info.get('variables', []) @@ -888,7 +895,9 @@ def _make_local(self, verified_time_coverage_start = time_coverage_start do_update_of_verified_time_coverage_start_once = False verified_time_coverage_end = time_coverage_end - child_monitor.progress(work=5, msg=f"Added {local_filepath}") + child_monitor.progress(work=5) + + remote_dataset_root.close() else: outdated_file_list = [] for file_rec in selected_file_list: diff --git a/test/ds/test_esa_cci_odp.py b/test/ds/test_esa_cci_odp.py index 3563ea757..ab4026610 100644 --- a/test/ds/test_esa_cci_odp.py +++ b/test/ds/test_esa_cci_odp.py @@ -357,3 +357,17 @@ def test_make_local_and_update(self): self.assertEqual(str(download_stats), '48 of 64 MB @ 0.000 MB/s, 75.0% complete') download_stats.handle_chunk(16000000) self.assertEqual(str(download_stats), '64 of 64 MB @ 0.000 MB/s, 100.0% complete') + + +@unittest.skip(reason='Used for debugging to fix Cate issues #823, #822, #818, #816, #783') +class SpatialSubsetTest(unittest.TestCase): + + def test_make_local_spatial(self): + data_store = EsaCciOdpDataStore() + data_source = data_store.query(ds_id='esacci.SST.day.L4.SSTdepth.multi-sensor.multi-platform.OSTIA.1-1.r1')[0] + # The following always worked fine: + ds = data_source.open_dataset(time_range=['2010-01-01', '2010-01-04'], region='-10,40,20,70') + self.assertIsNotNone(ds) + # The following reproduced Cate issues #823, #822, #818, #816, #783: + ds = data_source.make_local('SST_DAY_L4', time_range=['2010-01-01', '2010-01-04'], region='-10,40,20,70') + self.assertIsNotNone(ds)