From 05940485329a31770ad73686da52503187af81b9 Mon Sep 17 00:00:00 2001 From: "C.-W. HSU" <11950284+chiaweh2@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:32:16 -0600 Subject: [PATCH 01/20] testing workflow --- mom6/notebook/gulf_stream_index.ipynb | 389 +------------------------- 1 file changed, 3 insertions(+), 386 deletions(-) diff --git a/mom6/notebook/gulf_stream_index.ipynb b/mom6/notebook/gulf_stream_index.ipynb index 27ccb15..372efc0 100644 --- a/mom6/notebook/gulf_stream_index.ipynb +++ b/mom6/notebook/gulf_stream_index.ipynb @@ -3016,395 +3016,12 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "id": "42a9084a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'ssh' ()> Size: 4B\n",
-       "array(2.3547514e-08, dtype=float32)\n",
-       "Attributes:\n",
-       "    regrid_method:  bilinear
" - ], - "text/plain": [ - " Size: 4B\n", - "array(2.3547514e-08, dtype=float32)\n", - "Attributes:\n", - " regrid_method: bilinear" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "da_gs_index.mean().compute()" + "da_gs_index.mean().compute()\n" ] }, { From b3803accacaf4728a3740702056e461c9fe2fdab Mon Sep 17 00:00:00 2001 From: "C.-W. HSU" <11950284+chiaweh2@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:32:25 -0600 Subject: [PATCH 02/20] testin mean output --- mom6/notebook/gulf_stream_index.ipynb | 385 +++++++++++++++++++++++++- 1 file changed, 384 insertions(+), 1 deletion(-) diff --git a/mom6/notebook/gulf_stream_index.ipynb b/mom6/notebook/gulf_stream_index.ipynb index 372efc0..1073a83 100644 --- a/mom6/notebook/gulf_stream_index.ipynb +++ b/mom6/notebook/gulf_stream_index.ipynb @@ -3019,7 +3019,390 @@ "execution_count": 30, "id": "42a9084a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'ssh' ()> Size: 4B\n",
+       "array(2.3547514e-08, dtype=float32)\n",
+       "Attributes:\n",
+       "    regrid_method:  bilinear
" + ], + "text/plain": [ + " Size: 4B\n", + "array(2.3547514e-08, dtype=float32)\n", + "Attributes:\n", + " regrid_method: bilinear" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "da_gs_index.mean().compute()\n" ] From e0a462148f2e003b21a12538d7abece3f3a69fc0 Mon Sep 17 00:00:00 2001 From: "C.-W. HSU" <11950284+chiaweh2@users.noreply.github.com> Date: Wed, 17 Apr 2024 13:25:28 -0600 Subject: [PATCH 03/20] added bs4 for parsing html --- region_mom.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/region_mom.yml b/region_mom.yml index 4250e67..878f256 100644 --- a/region_mom.yml +++ b/region_mom.yml @@ -20,4 +20,5 @@ dependencies: - shapely - xarray - xesmf - - pytest \ No newline at end of file + - pytest + - beautifulsoup4 \ No newline at end of file From de6ec9f6c9f86a182d475e74ff154b66e659f551 Mon Sep 17 00:00:00 2001 From: "C.-W. HSU" <11950284+chiaweh2@users.noreply.github.com> Date: Wed, 17 Apr 2024 13:25:50 -0600 Subject: [PATCH 04/20] CICD --- .github/workflows/gha_pytest.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/gha_pytest.yml b/.github/workflows/gha_pytest.yml index a81686e..3515882 100644 --- a/.github/workflows/gha_pytest.yml +++ b/.github/workflows/gha_pytest.yml @@ -2,21 +2,18 @@ name: PR pytest checking on: pull_request: - branches: - - develop - - main - paths: - - './' + branches: + - develop jobs: checking_json: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Checkout repo uses: actions/checkout@v4 - - name: Setup Micromamba ${{ matrix.python-version }} + - name: Setup Micromamba uses: mamba-org/setup-micromamba@v1 with: environment-file: regional_mom6.yml From 99383b138787519478311ebd483964c02897e0c1 Mon Sep 17 00:00:00 2001 From: "C.-W. HSU" <11950284+chiaweh2@users.noreply.github.com> Date: Wed, 17 Apr 2024 13:32:32 -0600 Subject: [PATCH 05/20] big change on seperation of task the input data is seperated and handled outside of the module --- mom6/mom6_module/mom6_indexes.py | 84 ++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/mom6/mom6_module/mom6_indexes.py b/mom6/mom6_module/mom6_indexes.py index d5b395c..3dd9958 100644 --- a/mom6/mom6_module/mom6_indexes.py +++ b/mom6/mom6_module/mom6_indexes.py @@ -7,19 +7,11 @@ Indexes include: 1. Gulf stream index """ -from typing import ( - Literal, - List, - Union -) -import os + import warnings import numpy as np import xarray as xr import xesmf as xe -from mom6 import DATA_PATH -from mom6.mom6_module import mom6_process as mp - warnings.simplefilter("ignore") xr.set_options(keep_attrs=True) @@ -28,30 +20,30 @@ class GulfStreamIndex: """ The class is use to recreate the Gulf Stream Index calculation in detail. Original sources are [Ross et al., 2023](https://gmd.copernicus.org/articles/16/6943/2023/). - and [GFDL CEFI github repository](https://github.com/NOAA-GFDL/CEFI-regional-MOM6/blob/main/diagnostics/physics/ssh_eval.py). + and [GFDL CEFI github repository] + (https://github.com/NOAA-GFDL/CEFI-regional-MOM6/blob/main/diagnostics/physics/ssh_eval.py). """ def __init__( self, - data_type : Literal['forecast','historical'] = 'forecast', - grid : Literal['raw','regrid'] = 'regrid' + ds_data : xr.Dataset, + ssh_name : str = 'ssh' ) -> None: - """ - input for the class to determine what data the user - want the index to be calculated from. + """_summary_ Parameters ---------- - data_type : Literal['forecast','historical'], optional - This determine the data type the user want to use - to calculate the indexes, by default 'forecast' - grid : Literal['raw','regrid'], optional - This determine the type of grid solution the user - want to use to calculate the indexes, by default 'regrid' + ds_data : xr.Dataset + The sea level height dataset one want to use to + derived the gulf stream index. The coordinate + must have the name "lon" and "lat" exactly + ssh_name : str + The sea level height variable name in the dataset """ - self.data_type = data_type - self.grid = grid + self.dataset = ds_data + self.varname = ssh_name + @staticmethod def __region_focus( @@ -95,7 +87,7 @@ def __region_focus( xr.Dataset the regridded Dataset structure """ - + # longitude coordinate change -180 180 to 0 360 if lon_max < 0. : @@ -120,27 +112,37 @@ def __region_focus( def generate_index( self, ) -> xr.Dataset: - + """Generate the gulf stream index + + Returns + ------- + xr.Dataset + dataset containing the gulf_stream_index + variables. + """ + # getting the dataset - if self.data_type in ['historical']: - ds_data = mp.MOM6Historical.get_mom6_all('ssh',grid=self.grid) - elif self.data_type in ['forecast']: - ds_data = mp.MOM6Forecast.get_mom6_all('ssh',grid=self.grid) + ds_data = self.dataset # change longitude range from -180 180 to 0 360 - ds_data['geolon'] = ds_data['geolon']+360. + try: + lon = ds_data['lon'].data + except KeyError as e: + raise KeyError("Coordinates should have 'lon' and 'lat' with exact naming") from e + lon_ind = np.where(lon<0) + lon[lon_ind] += 360. + ds_data['lon'].data = lon + # ds_data = ds_data.sortby('lon') # Define Regridding data structure ds_regrid = self.__region_focus() # use xesmf to create regridder - if self.grid in ['raw']: - ds_data = ds_data.rename({'geolon':'lon','geolat':'lat'}) regridder = xe.Regridder(ds_data, ds_regrid, "bilinear", unmapped_to_nan=True) # perform regrid for each field ds_regrid = xr.Dataset() - ds_regrid['ssh'] = regridder(ds_data['ssh']) + ds_regrid['ssh'] = regridder(ds_data[self.varname]) # Calculate the Sea Surface Height (SSH) anomaly # We calculate the anomaly based on the monthly climatology. @@ -163,17 +165,25 @@ def generate_index( # Calculate the Gulf Stream Index # - use the maximum latitude index to find the SSH anomaly along the line shown above. - # - calculate the longitude mean of the SSH anomaly (time dependent) + # - calculate the longitude mean of the SSH anomaly (time dependent) # $$\text{{SSHa}}$$ # - calculate the stardarde deviation of the $\text{{SSHa}}$ # $$\text{{SSHa\_std}}$$ # - calculate the index # $$\text{{Gulf Stream Index}} = \frac{\text{{SSHa}}}{\text{{SSHa\_std}}}$$ - da_ssh_mean_along_gs = da_regrid_anom.isel(lat=da_lat_ind_maxstd).mean('lon') - da_ssh_mean_std_along_gs = da_regrid_anom.isel(lat=da_lat_ind_maxstd).mean('lon').std('time') + da_ssh_mean_along_gs = ( + da_regrid_anom + .isel(lat=da_lat_ind_maxstd) + .mean('lon') + ) + da_ssh_mean_std_along_gs = ( + da_regrid_anom + .isel(lat=da_lat_ind_maxstd) + .mean('lon') + .std('time') + ) da_gs_index = da_ssh_mean_along_gs/da_ssh_mean_std_along_gs ds_gs = xr.Dataset() ds_gs['gulf_stream_index'] = da_gs_index - return ds_gs From 5fc944b8fc8f7cb3eef1164531ccd4c8e81670de Mon Sep 17 00:00:00 2001 From: "C.-W. HSU" <11950284+chiaweh2@users.noreply.github.com> Date: Wed, 17 Apr 2024 13:33:40 -0600 Subject: [PATCH 06/20] adding the opendap source this is included in the unittest so is implemented along with the index feature --- mom6/mom6_module/mom6_process.py | 275 ++++++++++++++++++++++++------- 1 file changed, 217 insertions(+), 58 deletions(-) diff --git a/mom6/mom6_module/mom6_process.py b/mom6/mom6_module/mom6_process.py index d779bb8..36d6469 100644 --- a/mom6/mom6_module/mom6_process.py +++ b/mom6/mom6_module/mom6_process.py @@ -13,7 +13,9 @@ import os import warnings from datetime import date +import requests from dateutil.relativedelta import relativedelta +from bs4 import BeautifulSoup import cftime import numpy as np import pandas as pd @@ -31,6 +33,94 @@ 'GOMEX','GSL','NGOMEX','SGOMEX','Antilles','Floridian' ] + + +class OpenDapStore: + """class to handle the OPeNDAP request + """ + def __init__( + self, + grid : Literal['raw','regrid'] = 'raw', + data_type : Literal['forecast','historical'] = 'historical' + ) -> None: + """ + input for the class to get the opendap data + + Parameters + ---------- + grid : Literal['raw','regrid'], optional + The data extracted should be the regridded result or + the original model grid (curvilinear), by default 'raw' + data_type : Literal['forecast','historical'], optional + This determine the data type the user want to use + to calculate the indexes, by default 'historical' + + """ + self.grid = grid + self.data_type = data_type + + + def get_catalog(self)-> list: + """Getting the cataloged files + + Returns + ------- + list + a list of url in the form of string that + provide the locations of the data when + accessing using opendap + + Raises + ------ + FileNotFoundError + When the files is empty that means the init setting + or code must have some incorrect pairing. Debug possibly + needed. + """ + # print(self.data_type) + if self.data_type == 'historical' : + datatype = 'hist_run' + elif self.data_type == 'forecast' : + datatype = 'hindcast' + # print(datatype) + + if self.grid == 'raw' : + gridtype = '' + elif self.grid == 'regrid' : + gridtype = 'regrid' + + catalog_url = ( + 'https://psl.noaa.gov/thredds/catalog/'+ + f'Projects/CEFI/regional_mom6/{datatype}/{gridtype}/' + ) + opendap_url = ( + 'https://psl.noaa.gov/thredds/dodsC/'+ + f'Projects/CEFI/regional_mom6/{datatype}/{gridtype}/' + ) + + # Send a GET request to the URL + html_response = requests.get(catalog_url+'catalog.html', timeout=10) + + # Parse the html response + soup = BeautifulSoup(html_response.text, 'html.parser') + + # get all code tage in a tag in the "content" div + div_content = soup.find('div', class_='content') + a_tags = div_content.find_all('a') + all_file_list = [a_tag.find_all('code')[0].text for a_tag in a_tags] + + # remove regrid file and directory + files = [] + for file in all_file_list: + if 'bilinear' not in file: + if '.nc' in file: + files.append(opendap_url+file) + if not files : + raise FileNotFoundError + + return files + + class MOM6Forecast: """ Class for various mom6 forecast related calculation @@ -45,7 +135,8 @@ def __init__( iyear : int, imonth : int, var : str, - grid : Literal['raw','regrid'] = 'regrid' + grid : Literal['raw','regrid'] = 'raw', + source : Literal['local','opendap'] = 'local' ) -> None: """ input for the class to get the individual forecast @@ -61,12 +152,15 @@ def __init__( grid : Literal['raw','regrid'], optional The data extracted should be the regridded result or the original model grid (curvilinear), by default 'raw' + source : Literal['local','opendap'], optional + The source where to import the data, by default 'local' """ self.var = var self.iyear = iyear self.imonth = imonth self.grid = grid + self.source = source def get_mom6(self) -> xr.Dataset: """ @@ -82,27 +176,37 @@ def get_mom6(self) -> xr.Dataset: Dataset object is lazily-loaded. """ if self.grid == 'raw' : - # getting the forecast/hindcast data - mom6_dir = os.path.join(DATA_PATH,"hindcast/") - file_list = MOM6Misc.mom6_hindcast(mom6_dir) - - # static field - ds_static = MOM6Static.get_mom6_grid() - - # merge the static field with the variables + if self.source == 'local': + # getting the forecast/hindcast data + mom6_dir = os.path.join(DATA_PATH,"hindcast/") + file_list = MOM6Misc.mom6_hindcast(mom6_dir) + # static field + ds_static = MOM6Static.get_mom6_grid() + elif self.source == 'opendap': + file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog() + for file in file_list: + var_flag = 'static' in file + if var_flag : + ds_static = xr.open_dataset(file) + + # get individual file for file in file_list: #iyear_flag = f'i{self.iyear}' in file imon_flag = f'i{self.imonth}' in file var_flag = self.var in file if imon_flag and var_flag : - ds = xr.open_dataset(file).sel(init=f'{self.iyear}-{self.imonth}') + ds = xr.open_dataset(file).sel(init=f'{self.iyear}-{self.imonth}').compute() + # merge the static field with the variables ds = xr.merge([ds_static,ds]) elif self.grid == 'regrid': - # getting the forecast/hindcast data - mom6_dir = os.path.join(DATA_PATH,"hindcast/regrid/") - file_list = MOM6Misc.mom6_hindcast(mom6_dir) + if self.source == 'local': + # getting the forecast/hindcast data + mom6_dir = os.path.join(DATA_PATH,"hindcast/regrid/") + file_list = MOM6Misc.mom6_hindcast(mom6_dir) + elif self.source == 'opendap': + file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog() # read only the needed file for file in file_list: @@ -110,14 +214,15 @@ def get_mom6(self) -> xr.Dataset: imon_flag = f'i{self.imonth}' in file var_flag = self.var in file if imon_flag and var_flag : - ds = xr.open_dataset(file).sel(init=f'{self.iyear}-{self.imonth}') + ds = xr.open_dataset(file).sel(init=f'{self.iyear}-{self.imonth}').compute() return ds - + @staticmethod def get_mom6_all( var : str, - grid : Literal['raw','regrid'] = 'regrid' + grid : Literal['raw','regrid'] = 'raw', + source : Literal['local','opendap'] = 'local' ) -> xr.Dataset: """ Return the mom6 all rawgrid/regridded hindcast/forecast field @@ -131,6 +236,8 @@ def get_mom6_all( grid : Literal['raw','regrid'], optional The data extracted should be the regridded result or the original model grid (curvilinear), by default 'raw' + source : Literal['local','opendap'], optional + The source where to import the data, by default 'local' Returns ------- @@ -140,27 +247,45 @@ def get_mom6_all( Dataset object is lazily-loaded. """ if grid == 'raw' : - # getting the forecast/hindcast data - mom6_dir = os.path.join(DATA_PATH,"hindcast/") - file_list = MOM6Misc.mom6_hindcast(mom6_dir) + if source == 'local': + # getting the forecast/hindcast data + mom6_dir = os.path.join(DATA_PATH,"hindcast/") + file_list = MOM6Misc.mom6_hindcast(mom6_dir) + # static field + ds_static = MOM6Static.get_mom6_grid() + elif source == 'opendap': + file_list = OpenDapStore(grid=grid,data_type='forecast').get_catalog() + for file in file_list: + var_flag = 'static' in file + if var_flag : + ds_static = xr.open_dataset(file) file_read = [file for file in file_list if var in file] - # static field - ds_static = MOM6Static.get_mom6_grid() - # merge the static field with the variables - ds = xr.open_mfdataset(file_read,combine='nested',concat_dim='init').sortby('init') + ds = xr.open_mfdataset( + file_read, + combine='nested', + concat_dim='init', + chunks={'init': 4,'member':1,'lead':-1} + ).sortby('init') ds = xr.merge([ds_static,ds]) ds = ds.isel(init=slice(1,None)) # exclude the 1980 empty field due to merge elif grid == 'regrid': - # getting the forecast/hindcast data - mom6_dir = os.path.join(DATA_PATH,"hindcast/regrid/") - file_list = MOM6Misc.mom6_hindcast(mom6_dir) + if source == 'local': + # getting the forecast/hindcast data + mom6_dir = os.path.join(DATA_PATH,"hindcast/regrid/") + file_list = MOM6Misc.mom6_hindcast(mom6_dir) + elif source == 'opendap': + file_list = OpenDapStore(grid=grid,data_type='forecast').get_catalog() file_read = [file for file in file_list if var in file] - ds = xr.open_mfdataset(file_read,combine='nested',concat_dim='init').sortby('init') + ds = xr.open_mfdataset( + file_read,combine='nested', + concat_dim='init', + chunks={'init': 1,'member':1,'lead':1} + ).sortby('init') return ds @@ -393,12 +518,12 @@ def calculate_tercile_prob( # region_name : RegionalOptions = 'MAB' # ) -> xr.Dataset: # """ - # Based on regional averaged value of forecast/hindcast, + # Based on regional averaged value of forecast/hindcast, # use single initialization's normal distribution - # and pre-defined tercile value based on the long-term + # and pre-defined tercile value based on the long-term # statistic tercile value to find the probability of - # upper ,normal , and lower tercile - + # upper ,normal , and lower tercile + # It also find the largest probability in upper (positive), # normal (0), lower (negative) @@ -408,14 +533,14 @@ def calculate_tercile_prob( # The `lead_bin` used to binned the leading month result # ex: one can set `lead_bins = [0, 3, 6, 9, 12]` for four seasonal # mean. Default is no binning, lead_bins = None. - + # region_name : ({'MAB','GOM','SS','GB','SS_LME', # 'NEUS_LME','SEUS_LME','GOMEX','GSL','NGOMEX', # 'SGOMEX','Antilles','Floridian'), default: "MAB" # String indicating the regional abbreviation one want to perform # the regional averaged tercile calculation. - + # Returns # ------- # xr.Dataset @@ -442,7 +567,8 @@ def __init__( year : int, month : int, day : int = 1, - grid : Literal['raw','regrid'] = 'regrid' + grid : Literal['raw','regrid'] = 'raw', + source : Literal['local','opendap'] = 'local' ) -> None: """ input for getting the historical run data @@ -460,6 +586,8 @@ def __init__( grid : Literal['raw','regrid'], optional The data extracted should be the regridded result or the original model grid (curvilinear), by default 'raw' + source : Literal['local','opendap'], optional + The source where to import the data, by default 'local' """ self.var = var @@ -467,6 +595,7 @@ def __init__( self.month = month self.day = day self.grid = grid + self.source = source def get_mom6(self) -> xr.Dataset: """ @@ -482,40 +611,50 @@ def get_mom6(self) -> xr.Dataset: Dataset object is lazily-loaded. """ if self.grid == 'raw' : - # getting the forecast/hindcast data - mom6_dir = os.path.join(DATA_PATH,"hist_run/") - file_list = MOM6Misc.mom6_historical(mom6_dir) - - # static field - ds_static = MOM6Static.get_mom6_grid() + if self.source == 'local': + # getting the forecast/hindcast data + mom6_dir = os.path.join(DATA_PATH,"hist_run/") + file_list = MOM6Misc.mom6_historical(mom6_dir) + # static field + ds_static = MOM6Static.get_mom6_grid() + elif self.source == 'opendap': + file_list = OpenDapStore(grid=self.grid,data_type='historical').get_catalog() + for file in file_list: + var_flag = 'static' in file + if var_flag : + ds_static = xr.open_dataset(file) # merge the static field with the variables for file in file_list: var_flag = self.var in file if var_flag : - ds = xr.open_dataset(file).sel(time=f'{self.year}-{self.month}') - + ds = xr.open_dataset(file).sel(time=f'{self.year}-{self.month}').compute() + ds = xr.merge([ds_static,ds]) # remove the first time index 1980 exist after merging with static field ds = ds.sel(time=f'{self.year}-{self.month}') elif self.grid == 'regrid': - # getting the forecast/hindcast data - mom6_dir = os.path.join(DATA_PATH,"hist_run/regrid/") - file_list = MOM6Misc.mom6_historical(mom6_dir) + if self.source == 'local': + # getting the forecast/hindcast data + mom6_dir = os.path.join(DATA_PATH,"hist_run/regrid/") + file_list = MOM6Misc.mom6_historical(mom6_dir) + elif self.source == 'opendap': + file_list = OpenDapStore(grid=self.grid,data_type='historical').get_catalog() # read only the needed file for file in file_list: var_flag = self.var in file if var_flag : - ds = xr.open_dataset(file).sel(time=f'{self.year}-{self.month}') + ds = xr.open_dataset(file).sel(time=f'{self.year}-{self.month}').compute() return ds @staticmethod def get_mom6_all( var : str, - grid : Literal['raw','regrid'] = 'regrid' + grid : Literal['raw','regrid'] = 'raw', + source : Literal['local','opendap'] = 'local' ) -> xr.Dataset: """ Return the mom6 all rawgrid/regridded historical run field @@ -529,6 +668,8 @@ def get_mom6_all( grid : Literal['raw','regrid'], optional The data extracted should be the regridded result or the original model grid (curvilinear), by default 'raw' + source : Literal['local','opendap'], optional + The source where to import the data, by default 'local' Returns ------- @@ -538,27 +679,45 @@ def get_mom6_all( Dataset object is lazily-loaded. """ if grid == 'raw' : - # getting the forecast/hindcast data - mom6_dir = os.path.join(DATA_PATH,"hist_run/") - file_list = MOM6Misc.mom6_historical(mom6_dir) + if source == 'local': + # getting the historical run data + mom6_dir = os.path.join(DATA_PATH,"hist_run/") + file_list = MOM6Misc.mom6_historical(mom6_dir) + # static field + ds_static = MOM6Static.get_mom6_grid() + elif source == 'opendap': + file_list = OpenDapStore(grid=grid,data_type='historical').get_catalog() + for file in file_list: + var_flag = 'static' in file + if var_flag : + ds_static = xr.open_dataset(file) file_read = [file for file in file_list if var in file] - # static field - ds_static = MOM6Static.get_mom6_grid() - # merge the static field with the variables - ds = xr.open_mfdataset(file_read,combine='nested',concat_dim='time').sortby('time') + ds = xr.open_mfdataset( + file_read,combine='nested', + concat_dim='time', + chunks={'time': 100} + ).sortby('time') ds = xr.merge([ds_static,ds]) ds = ds.isel(time=slice(1,None)) # exclude the 1980 empty field due to merge elif grid == 'regrid': - # getting the forecast/hindcast data - mom6_dir = os.path.join(DATA_PATH,"hindcast/regrid/") - file_list = MOM6Misc.mom6_historical(mom6_dir) + if source == 'local': + # getting the historical run data + mom6_dir = os.path.join(DATA_PATH,"hist_run/regrid/") + file_list = MOM6Misc.mom6_historical(mom6_dir) + elif source == 'opendap': + file_list = OpenDapStore(grid=grid,data_type='historical').get_catalog() file_read = [file for file in file_list if var in file] - ds = xr.open_mfdataset(file_read,combine='nested',concat_dim='time').sortby('time') + ds = xr.open_mfdataset( + file_read, + combine='nested', + concat_dim='time', + chunks={'time': 100} + ).sortby('time') return ds From d15fbfc8169697470d0e56a71d6433ae0dd91106 Mon Sep 17 00:00:00 2001 From: "C.-W. HSU" <11950284+chiaweh2@users.noreply.github.com> Date: Wed, 17 Apr 2024 13:33:57 -0600 Subject: [PATCH 07/20] testing out the mocking datasolution --- mom6/notebook/gulf_stream_index.ipynb | 825 ++++++++++++++++++++++---- 1 file changed, 699 insertions(+), 126 deletions(-) diff --git a/mom6/notebook/gulf_stream_index.ipynb b/mom6/notebook/gulf_stream_index.ipynb index 1073a83..f7dab4e 100644 --- a/mom6/notebook/gulf_stream_index.ipynb +++ b/mom6/notebook/gulf_stream_index.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "48e504ff-196d-4f7e-8226-cf3265021e6e", "metadata": {}, "outputs": [], @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "b73f9d9b-b0fe-453c-a702-02041fcead2a", "metadata": {}, "outputs": [], @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "408d6f27", "metadata": {}, "outputs": [ @@ -441,21 +441,21 @@ " wet_c (yq, xq) float32 3MB ...\n", " ... ...\n", " areacello_bu (yq, xq) float32 3MB ...\n", - " average_DT (time) timedelta64[ns] 3kB dask.array<chunksize=(324,), meta=np.ndarray>\n", - " average_T1 (time) datetime64[ns] 3kB dask.array<chunksize=(324,), meta=np.ndarray>\n", - " average_T2 (time) datetime64[ns] 3kB dask.array<chunksize=(324,), meta=np.ndarray>\n", - " ssh (time, yh, xh) float32 849MB dask.array<chunksize=(22, 225, 206), meta=np.ndarray>\n", - " time_bnds (time, nv) datetime64[ns] 5kB dask.array<chunksize=(324, 2), meta=np.ndarray>\n", + " average_DT (time) timedelta64[ns] 3kB dask.array<chunksize=(100,), meta=np.ndarray>\n", + " average_T1 (time) datetime64[ns] 3kB dask.array<chunksize=(100,), meta=np.ndarray>\n", + " average_T2 (time) datetime64[ns] 3kB dask.array<chunksize=(100,), meta=np.ndarray>\n", + " ssh (time, yh, xh) float32 849MB dask.array<chunksize=(100, 225, 206), meta=np.ndarray>\n", + " time_bnds (time, nv) datetime64[ns] 5kB dask.array<chunksize=(100, 2), meta=np.ndarray>\n", "Attributes:\n", " NumFilesInSet: 1\n", " title: NWA12_MOM6_v1.0\n", " grid_type: regular\n", - " grid_tile: N/A