From 25b0da49b3ddad3907bc24f8ac1b38109a56d153 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 19 Dec 2018 14:01:41 +0100 Subject: [PATCH 1/2] using xarray's h5netcdf engine for storing OPeNDAP results. hopefully fixes #823, #822, #818, #816, #783. --- cate/ds/esa_cci_odp.py | 11 ++++++++++- test/ds/test_esa_cci_odp.py | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/cate/ds/esa_cci_odp.py b/cate/ds/esa_cci_odp.py index aeef7c62e..3f8ca8ca7 100644 --- a/cate/ds/esa_cci_odp.py +++ b/cate/ds/esa_cci_odp.py @@ -853,6 +853,7 @@ def _make_local(self, remote_dataset = xr.open_dataset(dataset_uri, drop_variables=[variable.get('name') for variable in excluded_variables]) + remote_dataset_root = remote_dataset child_monitor.progress(work=20, msg=f"Opened {dataset_uri}") if var_names: @@ -871,7 +872,13 @@ def _make_local(self, if compression_enabled: for sel_var_name in remote_dataset.variables.keys(): remote_dataset.variables.get(sel_var_name).encoding.update(encoding_update) - remote_dataset.to_netcdf(local_filepath) + # Note: we are using engine='h5netcdf' here because the default engine='netcdf4' + # causes crashes in file "netCDF4/_netCDF4.pyx" with currently used netcdf4-1.4.2 conda + # package from conda-forge. This occurs whenever remote_dataset.to_netcdf() is called a + # second time in this loop. + # Probably related to https://github.com/pydata/xarray/issues/2560. + # And probably fixes Cate issues #823, #822, #818, #816, #783. + remote_dataset.to_netcdf(local_filepath, format='NETCDF4', engine='h5netcdf') child_monitor.progress(work=75, msg=f"Written {local_filepath}") if do_update_of_variables_meta_info_once: @@ -889,6 +896,8 @@ def _make_local(self, do_update_of_verified_time_coverage_start_once = False verified_time_coverage_end = time_coverage_end child_monitor.progress(work=5, msg=f"Added {local_filepath}") + + remote_dataset_root.close() else: outdated_file_list = [] for file_rec in selected_file_list: diff --git a/test/ds/test_esa_cci_odp.py b/test/ds/test_esa_cci_odp.py index 3563ea757..ab4026610 100644 --- a/test/ds/test_esa_cci_odp.py +++ b/test/ds/test_esa_cci_odp.py @@ -357,3 +357,17 @@ def test_make_local_and_update(self): self.assertEqual(str(download_stats), '48 of 64 MB @ 0.000 MB/s, 75.0% complete') download_stats.handle_chunk(16000000) self.assertEqual(str(download_stats), '64 of 64 MB @ 0.000 MB/s, 100.0% complete') + + +@unittest.skip(reason='Used for debugging to fix Cate issues #823, #822, #818, #816, #783') +class SpatialSubsetTest(unittest.TestCase): + + def test_make_local_spatial(self): + data_store = EsaCciOdpDataStore() + data_source = data_store.query(ds_id='esacci.SST.day.L4.SSTdepth.multi-sensor.multi-platform.OSTIA.1-1.r1')[0] + # The following always worked fine: + ds = data_source.open_dataset(time_range=['2010-01-01', '2010-01-04'], region='-10,40,20,70') + self.assertIsNotNone(ds) + # The following reproduced Cate issues #823, #822, #818, #816, #783: + ds = data_source.make_local('SST_DAY_L4', time_range=['2010-01-01', '2010-01-04'], region='-10,40,20,70') + self.assertIsNotNone(ds) From 2bcce657c193dce6300fcefca19a278ab02e24bf Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 19 Dec 2018 14:12:54 +0100 Subject: [PATCH 2/2] removed sub-progress messages --- cate/ds/esa_cci_odp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cate/ds/esa_cci_odp.py b/cate/ds/esa_cci_odp.py index 3f8ca8ca7..4b1ad46ff 100644 --- a/cate/ds/esa_cci_odp.py +++ b/cate/ds/esa_cci_odp.py @@ -854,7 +854,7 @@ def _make_local(self, drop_variables=[variable.get('name') for variable in excluded_variables]) remote_dataset_root = remote_dataset - child_monitor.progress(work=20, msg=f"Opened {dataset_uri}") + child_monitor.progress(work=20) if var_names: remote_dataset = remote_dataset.drop([var_name for var_name in remote_dataset.data_vars.keys() @@ -879,7 +879,7 @@ def _make_local(self, # Probably related to https://github.com/pydata/xarray/issues/2560. # And probably fixes Cate issues #823, #822, #818, #816, #783. remote_dataset.to_netcdf(local_filepath, format='NETCDF4', engine='h5netcdf') - child_monitor.progress(work=75, msg=f"Written {local_filepath}") + child_monitor.progress(work=75) if do_update_of_variables_meta_info_once: variables_info = local_ds.meta_info.get('variables', []) @@ -895,7 +895,7 @@ def _make_local(self, verified_time_coverage_start = time_coverage_start do_update_of_verified_time_coverage_start_once = False verified_time_coverage_end = time_coverage_end - child_monitor.progress(work=5, msg=f"Added {local_filepath}") + child_monitor.progress(work=5) remote_dataset_root.close() else: